def get_education_profile(geo, session): edu_dist_data, total_over_20 = get_stat_data( ["highest educational level"], geo, session, recode=COLLAPSED_EDUCATION_CATEGORIES, table_universe="Individuals 20 and older", key_order=EDUCATION_KEY_ORDER, ) GENERAL_EDU = ( EDUCATION_GET_OR_HIGHER if str(current_context().get("year")) == "2011" else EDUCATION_GET_OR_HIGHER_2016 ) general_edu, total_general_edu = get_stat_data( ["highest educational level"], geo, session, table_universe="Individuals 20 and older", only=GENERAL_EDU, ) FURTHER_EDU = ( EDUCATION_FET_OR_HIGHER if str(current_context().get("year")) == "2011" else EDUCATION_FET_OR_HIGHER_2016 ) further_edu, total_further_edu = get_stat_data( ["highest educational level"], geo, session, table_universe="Individuals 20 and older", only=FURTHER_EDU, ) edu_split_data = { "percent_general_edu": { "name": "Completed Grade 9 or higher", "numerators": {"this": total_general_edu}, "values": {"this": round(total_general_edu / total_over_20 * 100, 2)}, }, "percent_further_edu": { "name": "Completed Matric or higher", "numerators": {"this": total_further_edu}, "values": {"this": round(total_further_edu / total_over_20 * 100, 2)}, }, "metadata": general_edu["metadata"], } profile = { "educational_attainment_distribution": edu_dist_data, "educational_attainment": edu_split_data, } return profile
def get_demographics_profile(geo, session): year = current_context().get('year') with dataset_context(year=year): # gender gender_dist_data, total_pop = get_stat_data( 'gender', geo, session, table_fields=['gender', 'age group']) # age group age_group_dist_data, _ = get_stat_data( 'age group', geo, session, table_fields=['gender', 'age group']) total_under_15 = age_group_dist_data['0-14 Years']['numerators']['this'] # rural or urban rural_dist_data, _ = get_stat_data( ['rural or urban','gender'], geo, session, table_fields=['gender', 'rural or urban']) final_data = { 'gender_ratio': gender_dist_data, 'age_group_distribution': age_group_dist_data, 'under_15': { 'name': 'Under 15 years', 'values': {'this': total_under_15} }, 'rural_distribution': rural_dist_data, 'total_population': { "name": "People", "values": {"this": total_pop} }} return final_data
def get_profile(geo, profile_name, request): session = get_session() try: comparative_geos = geo_data.get_comparative_geos(geo) data = {} data["primary_release_year"] = current_context().get("year") sections = list(PROFILE_SECTIONS) for section in sections: function_name = "get_%s_profile" % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo, session) # if section == "indicator": # # get profiles for comparative geometries # for comp_geo in comparative_geos: # try: # merge_dicts( # data[section], # func(comp_geo, session), # comp_geo.geo_level, # ) # except KeyError as e: # msg = ( # "Error merging data into %s for section '%s' from %s: KeyError: %s" # % (geo.geoid, section, comp_geo.geoid, e) # ) # log.fatal(msg, exc_info=e) # raise ValueError(msg) finally: session.close() return data
def get_db_table(self, release=None, year=None): """ Get a DBTable instance for a particular year or release, or the latest if neither are specified. """ if year is None and release is None: from wazimap.data.utils import current_context # use the current context year = current_context().get("year") if year: release = self.get_release(year) if not release: raise ValueError( "Unclear which release year to use. Specify a release or a year, or use dataset_context(year=...)" ) # get the db_table fieldname = self.release_class.__name__.lower() + "__release" query = self.db_table_releases.filter(**{fieldname: release}) db_table = query.first() db_table.active_release = release self.setup_model(db_table) return db_table
def get_census_profile(geo, profile_name, request): geo.version = str(geo.version) session = get_session() year = current_context().get('year') try: data = {} sections = [] selected_sections = [] for cat in SECTIONS: sections.extend(SECTIONS[cat]['profiles']) for section in sections: section = section.lower().replace(' ', '_') function_name = 'get_%s_profile' % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo, session) # tweaks to make the data nicer # show X largest groups on their own and group the rest as 'Other' if 'households' in sections: group_remainder(data['households']['roofing_material_distribution'], 5) group_remainder(data['households']['wall_material_distribution'], 5) data['all_sections'] = SECTIONS data['primary_release_year'] = year if (selected_sections == []): selected_sections = sections data['raw_selected_sections'] = selected_sections data['selected_sections'] = [x.replace(' ','_').lower() for x in selected_sections] data['afrobarometer'] = get_afrobarometer_profile(geo, session) return data finally: session.close()
def get_profile(geo, profile_name, request): year = current_context().get('year') session = get_session() data = {} try: data['demographics'] = get_population(geo, session) data['primary_release_year'] = year data['afrobarometer'] = get_afrobarometer_profile(geo, session) return data finally: session.close()
def get_profile(geo, profile_name, request): session = get_session() try: comp_geos = geo_data.get_comparative_geos(geo) data = {} sections = list(PROFILE_SECTIONS) if geo.geo_level not in [ 'country', 'province', 'district', 'municipality' ]: pass # Raise error as we don't have this data """ The following is temporary and enables us to determine what to display for geos: Within WC: All indicators, with WC as root comparisson geo Outside WC: Some indicators, with ZA as root comparrison geo This is beacause Wazimap expects data for all geos. This will be removed once we have imported all the data. """ # There are datasets with only WC information display_profile = 'WC' if ( geo.geo_code == 'WC' or 'WC' in [cg.geo_code for cg in comp_geos]) else 'ZA' data['display_profile'] = display_profile data['primary_release_year'] = current_context().get('year') for section in sections: function_name = 'get_%s_profile' % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo, session, display_profile) # get profiles for province and/or country for comp_geo in comp_geos: # merge summary profile into current geo profile merge_dicts( data[section], func(comp_geo, session, display_profile, comparative=True), comp_geo.geo_level) # Make data look nicer on profile page group_remainder(data['demographics']['youth_population_by_language'], 11) return data finally: session.close()
def get_land_audit_profile(geo, session): year = current_context().get('year') with dataset_context(year=year): land_use_dist = LOCATIONNOTFOUND land_user_dist = LOCATIONNOTFOUND land_distribution_gender = LOCATIONNOTFOUND land_ownership = LOCATIONNOTFOUND try: land_use_dist, _ = get_stat_data('land_use', geo, session, table_name='landuse', table_fields=['land_use']) except Exception as e: pass try: land_user_dist, _ = get_stat_data('land_user', geo, session, table_name='landuser', table_fields=['land_user']) except Exception: pass try: land_distribution_gender, _ = get_stat_data( 'land_ownership_by_gender', geo, session, table_name='privatelanddistributionbygender', table_fields=['land_ownership_by_gender']) except Exception: pass try: land_ownership, _ = get_stat_data('private_vs_state_ownership', geo, session, table_name='landownership', table_fields=[ 'private_vs_state_ownership']) except Exception: pass is_missing = land_user_dist.get('is_missing') and \ land_use_dist.get('is_missing') and \ land_distribution_gender.get('is_missing') and \ land_ownership.get('is_missing') return { 'is_missing': is_missing, 'land_user_dist': land_user_dist, 'land_use_dist': land_use_dist, 'land_distribution_gender': land_distribution_gender, 'land_ownership': land_ownership, }
def get_profile(geo, profile_name, request): session = get_session() data = {} year = current_context().get('year') try: data['primary_release_year'] = year data['demographics'] = get_demographics_profile(geo, session, year) data['households'] = get_households_profile(geo, session, year) data['disability'] = get_disabilities_profile(geo, session, year) data['elections2016'] = get_elections2016_profile(geo, session) data['afrobarometer'] = get_afrobarometer_profile(geo, session) return data finally: session.close()
def get_profile(geo, profile_name, request): session = get_session() try: comp_geos = geo_data.get_comparative_geos(geo) data = {} sections = list(PROFILE_SECTIONS) if geo.geo_level not in ['country', 'province', 'district', 'municipality']: pass # Raise error as we don't have this data """ The following is temporary and enables us to determine what to display for geos: Within WC: All indicators, with WC as root comparisson geo Outside WC: Some indicators, with ZA as root comparrison geo This is beacause Wazimap expects data for all geos. This will be removed once we have imported all the data. """ # There are datasets with only WC information display_profile = 'WC' if (geo.geo_code == 'WC' or 'WC' in [cg.geo_code for cg in comp_geos]) else 'ZA' data['display_profile'] = display_profile data['primary_release_year'] = current_context().get('year') for section in sections: function_name = 'get_%s_profile' % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo, session, display_profile) # get profiles for province and/or country for comp_geo in comp_geos: # merge summary profile into current geo profile merge_dicts(data[section], func(comp_geo, session, display_profile, comparative=True), comp_geo.geo_level) # Make data look nicer on profile page group_remainder(data['demographics']['youth_population_by_language'], 11) return data finally: session.close()
def get_sectionaltitleland_profile(geo, session): year = current_context().get('year') with dataset_context(year=year): topic_profiles = SECTIONS['sectionaltitleland']['profiles'] profiles_data = {'is_missing': True} for profile in topic_profiles: try: profile_table = profile.lower() profile_name = profile.lower().replace(' ', '_') profiles_data[profile_name] = LOCATIONNOTFOUND profiles_data[profile_name], _ = get_stat_data([profile_table], geo, session) except Exception: pass profiles_data['is_missing'] = profiles_data.get('is_missing') and \ profiles_data[profile_name].get( 'is_missing') return profiles_data
def get_profile(geo, profile_name, request): session = get_session() try: comparative_geos = geo_data.get_comparative_geos(geo) data = {} data["primary_release_year"] = current_context().get("year") sections = list(PROFILE_SECTIONS) for section in sections: function_name = "get_%s_profile" % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo, session) # get profiles for comparative geometries for comp_geo in comparative_geos: try: merge_dicts( data[section], func(comp_geo, session), comp_geo.geo_level ) except KeyError as e: msg = ( "Error merging data into %s for section '%s' from %s: KeyError: %s" % (geo.geoid, section, comp_geo.geoid, e) ) log.fatal(msg, exc_info=e) raise ValueError(msg) finally: session.close() import json with open("example.json", "w") as f: json.dump(data, f) return data
def get_db_table(self, release=None, year=None): """ Get a DBTable instance for a particular year or release, or the latest if neither are specified. """ if year is None and release is None: from wazimap.data.utils import current_context # use the current context year = current_context().get('year') if year: release = self.get_release(year) if not release: raise ValueError("Unclear which release year to use. Specify a release or a year, or use dataset_context(year=...)") # get the db_table fieldname = self.release_class.__name__.lower() + '__release' query = self.db_table_releases.filter(**{fieldname: release}) db_table = query.first() db_table.active_release = release self.setup_model(db_table) return db_table
def get_living_environment_profile(geo, session, display_profile, comparative=False): final_data = {} youth_electricity_access, _ = get_stat_data( ['electricity access'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=ELECTRICITY_ACCESS_KEY_ORDER) youth_toilet_access, _ = get_stat_data( ['toilet access'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=TOILET_ACCESS_KEY_ORDER) youth_water_access, _ = get_stat_data( ['water access'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=WATER_ACCESS_KEY_ORDER) youth_type_of_dwelling, _ = get_stat_data( ['type of dwelling'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=TYPE_OF_DWELLING_ORDER) informal_not_in_backyard = youth_type_of_dwelling.get( 'Informal not in backyard', {}).get('values', {}).get('this', None) informal_in_backyard = youth_type_of_dwelling.get( 'Informal in backyard', {}).get('values', {}).get('this', None) youth_dwelling_informal = None if informal_not_in_backyard or informal_in_backyard: youth_dwelling_informal = (informal_not_in_backyard or 0) + (informal_in_backyard or 0) type_of_area_order = (TYPE_OF_AREA_ORDER_2016 if current_context().get('year') == 'latest' else TYPE_OF_AREA_ORDER) youth_type_of_area, _ = get_stat_data( ['type of area'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=type_of_area_order) youth_household_crowded, _ = get_stat_data( ['household crowded'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=HH_CROWDED_KEY_ORDER) if str(current_context().get('year')) == '2011': # The releases have different indicators for internet access youth_access_to_internet, _ = get_stat_data( ['access to internet'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', table_name='youth_access_to_internet_gender', key_order=INTERNET_ACCESS_ORDER) final_data.update({ 'youth_no_access_to_internet': { "name": "Of youth live in households with no access to internet", "values": { "this": youth_access_to_internet['No access to internet']['values'] ['this'] } } }) else: youth_access_to_internet, _ = get_stat_data( ['access to internet'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', order_by='-total') final_data.update({ 'youth_cell_phone_access_internet': { "name": "Of youth access the internet through a cell phone", "values": { "this": youth_access_to_internet['Cell phone']['values']['this'] } } }) youth_by_living_with_parents_status, _ = get_stat_data( ['living with parents'], geo, session, table_universe='Youth aged 15-19 living in households', table_dataset='Census and Community Survey', key_order=LIVING_WITH_PARENTS_KEY_ORDER) living_with_parent_keys = ('Both parents', 'Mother only', 'Father only') living_with_parents_stat = sum( v['values']['this'] or 0 for k, v in youth_by_living_with_parents_status.iteritems() if k in living_with_parent_keys) final_data.update({ 'youth_electricity_access': youth_electricity_access, 'youth_toilet_access': youth_toilet_access, 'youth_water_access': youth_water_access, 'youth_dwelling_informal': { "name": "Of youth live in households that are informal dwellings (shacks)", "values": { "this": youth_dwelling_informal } }, 'youth_type_of_dwelling': youth_type_of_dwelling, 'youth_type_of_area': youth_type_of_area, 'youth_households_overcrowded': { "name": "Of youth live in households that are overcrowded *", "values": { "this": youth_household_crowded['Overcrowded']['values']['this'] } }, 'youth_household_crowded': youth_household_crowded, 'youth_access_to_internet': youth_access_to_internet, 'youth_living_with_parents': { "name": "Of youth aged 15-19 live with at least one biological parent", "values": { "this": living_with_parents_stat } }, 'youth_by_living_with_parents_status': youth_by_living_with_parents_status }) return final_data
def get_demographics_profile(geo, session, display_profile, comparative=False): youth_pop_table = get_datatable('youth_population') youth_pop, pop_total = youth_pop_table.get_stat_data(geo, total='total_pop', percent='False') youth_age_group_data, _ = get_stat_data( ['age groups in 10 years'], geo, session, table_universe='Population', table_dataset='Census and Community Survey') youth_gender_data, _ = get_stat_data( ['gender'], geo, session, table_fields=['gender', 'population group'], table_universe='Youth', table_dataset='Census and Community Survey', key_order=GENDER_ORDER) population_group_order = (POPULATION_GROUP_ORDER_2016 if current_context().get('year') == 'latest' else POPULATION_GROUP_ORDER) youth_pop_group_data, _ = get_stat_data( ['population group'], geo, session, table_fields=['population group', 'gender'], table_universe='Youth', table_dataset='Census and Community Survey', table_name='youth_population_group_gender', key_order=population_group_order) youth_language_data, _ = get_stat_data( ['language'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', order_by='-total') youth_language_most_spoken = youth_language_data[ youth_language_data.keys()[0]] youth_province_birth_data, _ = get_stat_data( ['province of birth'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', key_order=PROVINCE_ORDER) youth_region_birth_data, _ = get_stat_data( ['region of birth'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', key_order=REGION_ORDER) youth_region_birth_data['SADC']['name'] = 'SADC*' youth_citizenship_data, _ = get_stat_data( ['citizenship'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', key_order=CITIZENSHIP_ORDER) final_data = { 'total_population': { "name": "People", "values": { "this": pop_total } }, 'youth_population_total': { "name": "Youth aged 15-24", "values": { "this": youth_pop['youth_pop']['numerators']['this'] } }, 'youth_population_perc': { "name": "Of the population are youth aged 15-24", "values": { "this": youth_pop['youth_pop']['values']['this'] }, }, 'youth_population_by_age_group': youth_age_group_data, 'youth_population_by_gender': youth_gender_data, 'youth_population_by_pop_group': youth_pop_group_data, 'youth_language_most_spoken': youth_language_most_spoken, 'youth_population_by_language': youth_language_data, 'youth_born_in_sa': { "name": "Of the youth population were born in South Africa", "values": { "this": youth_region_birth_data['South Africa']['values']['this'] }, }, 'youth_by_province_of_birth': youth_province_birth_data, 'youth_by_region_of_birth': youth_region_birth_data, 'youth_sa_citizenship': { 'name': 'of the youth population are South African citizens', 'values': { 'this': youth_citizenship_data['Yes']['values']['this'] } }, 'youth_by_citizenship': youth_citizenship_data, } # The following info is displayed in the block over the map if geo.square_kms: final_data['population_density'] = { 'name': "youth per square kilometre", 'values': { "this": youth_pop['youth_pop']['numerators']['this'] / geo.square_kms } } return final_data
def get_living_environment_profile(geo, session, display_profile, comparative=False): final_data = {} youth_electricity_access, _ = get_stat_data( ['electricity access'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=ELECTRICITY_ACCESS_KEY_ORDER) youth_toilet_access, _ = get_stat_data( ['toilet access'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=TOILET_ACCESS_KEY_ORDER) youth_water_access, _ = get_stat_data( ['water access'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=WATER_ACCESS_KEY_ORDER) youth_type_of_dwelling, _ = get_stat_data( ['type of dwelling'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=TYPE_OF_DWELLING_ORDER) informal_not_in_backyard = youth_type_of_dwelling.get('Informal not in backyard', {}).get('values', {}).get('this', None) informal_in_backyard = youth_type_of_dwelling.get('Informal in backyard', {}).get('values', {}).get('this', None) youth_dwelling_informal = None if informal_not_in_backyard or informal_in_backyard: youth_dwelling_informal = (informal_not_in_backyard or 0) + (informal_in_backyard or 0) type_of_area_order = (TYPE_OF_AREA_ORDER_2016 if current_context().get('year') == 'latest' else TYPE_OF_AREA_ORDER) youth_type_of_area, _ = get_stat_data( ['type of area'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=type_of_area_order) youth_household_crowded, _ = get_stat_data( ['household crowded'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', key_order=HH_CROWDED_KEY_ORDER) if str(current_context().get('year')) == '2011': # The releases have different indicators for internet access youth_access_to_internet, _ = get_stat_data( ['access to internet'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', table_name='youth_access_to_internet_gender', key_order=INTERNET_ACCESS_ORDER) final_data.update({ 'youth_no_access_to_internet': { "name": "Of youth live in households with no access to internet", "values": {"this": youth_access_to_internet['No access to internet']['values']['this']} } }) else: youth_access_to_internet, _ = get_stat_data( ['access to internet'], geo, session, table_universe='Youth living in households', table_dataset='Census and Community Survey', order_by='-total') final_data.update({ 'youth_cell_phone_access_internet': { "name": "Of youth access the internet through a cell phone", "values": {"this": youth_access_to_internet['Cell phone']['values']['this']} } }) youth_by_living_with_parents_status, _ = get_stat_data( ['living with parents'], geo, session, table_universe='Youth aged 15-19 living in households', table_dataset='Census and Community Survey', key_order=LIVING_WITH_PARENTS_KEY_ORDER) living_with_parent_keys = ('Both parents', 'Mother only', 'Father only') living_with_parents_stat = sum( v['values']['this'] or 0 for k, v in youth_by_living_with_parents_status.iteritems() if k in living_with_parent_keys) final_data.update({ 'youth_electricity_access': youth_electricity_access, 'youth_toilet_access': youth_toilet_access, 'youth_water_access': youth_water_access, 'youth_dwelling_informal': { "name": "Of youth live in households that are informal dwellings (shacks)", "values": {"this": youth_dwelling_informal} }, 'youth_type_of_dwelling': youth_type_of_dwelling, 'youth_type_of_area': youth_type_of_area, 'youth_households_overcrowded': { "name": "Of youth live in households that are overcrowded *", "values": {"this": youth_household_crowded['Overcrowded']['values']['this']} }, 'youth_household_crowded': youth_household_crowded, 'youth_access_to_internet': youth_access_to_internet, 'youth_living_with_parents': { "name": "Of youth aged 15-19 live with at least one biological parent", "values": {"this": living_with_parents_stat} }, 'youth_by_living_with_parents_status': youth_by_living_with_parents_status }) return final_data
def get_stat_data(self, geo, fields=None, key_order=None, percent=True, total=None, recode=None, year=None): """ Get a data dictionary for a place from this table. This fetches the values for each column in this table and returns a data dictionary for those values, with appropriate names and metadata. :param geo: the geography :param str or list fields: the columns to fetch stats for. By default, all columns except geo-related and the total column (if any) are used. :param str key_order: explicit ordering of (recoded) keys, or None for the default order. Default order is the order in +fields+ if given, otherwise it's the natural column order from the DB. :param bool percent: should we calculate percentages, or just include raw values? :param int total: the total value to use for percentages, name of a field, or None to use the sum of all retrieved fields (default) :param dict recode: map from field names to strings to recode column names. Many fields can be recoded to the same thing, their values will be summed. :param str year: release year to use. None will try to use the current dataset context, and 'latest' will use the latest release. :return: (data-dictionary, total) """ db_table = self.get_db_table(year=year or current_context().get('year')) model = db_table.model columns = self.columns(db_table) session = get_session() try: if fields is not None and not isinstance(fields, list): fields = [fields] if fields: for f in fields: if f not in columns: raise ValueError("Invalid field/column '%s' for table '%s'. Valid columns are: %s" % ( f, self.id, ', '.join(columns.keys()))) else: fields = columns.keys() if self.total_column: fields.remove(self.total_column) recode = recode or {} if recode: # change lambda to dicts if not isinstance(recode, dict): recode = {f: recode(f) for f in fields} # is the total column valid? if isinstance(total, basestring) and total not in columns: raise ValueError("Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % ( total, self.id, ', '.join(columns.keys()))) # table columns to fetch cols = [model.__table__.columns[c] for c in fields] if total is not None and isinstance(total, basestring) and total not in cols: cols.append(total) # do the query. If this returns no data, row is None row = session\ .query(*cols)\ .filter(model.geo_level == geo.geo_level, model.geo_code == geo.geo_code, model.geo_version == geo.version)\ .first() if row is None: row = ZeroRow() # what's our denominator? if total is None: # sum of all columns total = sum(getattr(row, f) or 0 for f in fields) elif isinstance(total, basestring): total = getattr(row, total) # Now build a data dictionary based on the columns in +row+. # Multiple columns may be recoded into one, so we have to # accumulate values as we go. results = OrderedDict() key_order = key_order or fields # default key order is just the list of fields for field in key_order: val = getattr(row, field) or 0 # recode the key for this field, default is to keep it the same key = recode.get(field, field) # set the recoded field name, noting that the key may already # exist if another column recoded to it field_info = results.setdefault(key, {'name': recode.get(field, columns[field]['name'])}) if percent: # sum up existing values, if any val = val + field_info.get('numerators', {}).get('this', 0) field_info['values'] = {'this': p(val, total)} field_info['numerators'] = {'this': val} else: # sum up existing values, if any val = val + field_info.get('values', {}).get('this', 0) field_info['values'] = {'this': val} add_metadata(results, self, db_table.active_release) return results, total finally: session.close()
def get_service_delivery_profile(geo, session): # water source water_src_data, total_wsrc = get_stat_data( ["source of water"], geo, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by="-total", ) # water from a service provider total_water_sp = 0.0 perc_water_sp = 0.0 if current_context().get("year") == "latest": water_supplier_data, total_wspl = get_stat_data( ["supplier of water"], geo, session, recode=SHORT_WATER_SUPPLIER_CATEGORIES, order_by="-total", ) water_sp = ["Service provider", "Water scheme"] for key in water_sp: if key in water_supplier_data: total_water_sp += water_supplier_data[key]["numerators"]["this"] perc_water_sp = percent(total_water_sp, total_wspl) else: if "Service provider" in water_src_data: total_water_sp = water_src_data["Service provider"]["numerators"]["this"] perc_water_sp = percent(total_water_sp, total_wsrc) percentage_water_from_service_provider = { "name": "Are getting water from a regional or local service provider", "numerators": {"this": total_water_sp}, "values": {"this": perc_water_sp}, } # refuse disposal refuse_disp_data, total_ref = get_stat_data( ["refuse disposal"], geo, session, recode=SHORT_REFUSE_DISPOSAL_CATEGORIES, order_by="-total", ) total_ref_sp = 0.0 for k, v in refuse_disp_data.iteritems(): if k.startswith("Service provider"): total_ref_sp += v["numerators"]["this"] sp_name_2011 = ( "Are getting refuse disposal from a local authority or private company" ) sp_name_2016 = "Are getting refuse disposal from a local authority, private company or community members" percentage_ref_disp_from_service_provider = { "name": sp_name_2011 if str(current_context().get("year")) == "2011" else sp_name_2016, "numerators": {"this": total_ref_sp}, "values": {"this": percent(total_ref_sp, total_ref)}, } # electricity if geo.version == "2011" and str(current_context().get("year")) == "2011": elec_attrs = [ "electricity for cooking", "electricity for heating", "electricity for lighting", ] elec_table = get_datatable("electricityforcooking_electricityforheating_electr") objects = elec_table.get_rows_for_geo(geo, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { "total_all_elec": { "name": "Have electricity for everything", "numerators": {"this": 0.0}, }, "total_some_not_all_elec": { "name": "Have electricity for some things", "numerators": {"this": 0.0}, }, "total_no_elec": {"name": "No electricity", "numerators": {"this": 0.0}}, } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith("no ") has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data["total_all_elec"]["numerators"]["this"] += obj.total elif has_some: elec_access_data["total_some_not_all_elec"]["numerators"][ "this" ] += obj.total else: elec_access_data["total_no_elec"]["numerators"]["this"] += obj.total set_percent_values(elec_access_data, total_elec) add_metadata( elec_access_data, elec_table, elec_table.get_release(current_context().get("year")), ) if current_context().get("year") == "latest": # We don't have this data for 2011 elec_access, _ = get_stat_data( ["access to electricity"], geo, session, table_universe="Population", recode=ELECTRICITY_ACCESS_RECODE, order_by="-total", ) # toilets toilet_data, total_toilet = get_stat_data( ["toilet facilities"], geo, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by="-total", ) total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith("Flush") or key.startswith("Chemical"): total_flush_toilet += data["numerators"]["this"] if key == "None": total_no_toilet += data["numerators"]["this"] profile = { "water_source_distribution": water_src_data, "percentage_water_from_service_provider": percentage_water_from_service_provider, "refuse_disposal_distribution": refuse_disp_data, "percentage_ref_disp_from_service_provider": percentage_ref_disp_from_service_provider, "percentage_flush_toilet_access": { "name": "Have access to flush or chemical toilets", "numerators": {"this": total_flush_toilet}, "values": {"this": percent(total_flush_toilet, total_toilet)}, }, "percentage_no_toilet_access": { "name": "Have no access to any toilets", "numerators": {"this": total_no_toilet}, "values": {"this": percent(total_no_toilet, total_toilet)}, }, "toilet_facilities_distribution": toilet_data, } if current_context().get("year") == "latest": profile.update( { "water_supplier_distribution": water_supplier_data, "electricity_access": elec_access, "percentage_no_electricity_access": { "name": "Have no access to electricity", "numerators": elec_access["No access to electricity"]["numerators"], "values": elec_access["No access to electricity"]["values"], }, } ) if geo.version == "2011": profile.update( { "percentage_electricity_access": { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": {"this": total_some_elec}, "values": {"this": percent(total_some_elec, total_elec)}, }, "electricity_access_distribution": elec_access_data, } ) return profile
def get_economics_profile(geo, session): profile = {} # income if geo.version == "2011": # distribution recode = COLLAPSED_MONTHLY_INCOME_CATEGORIES fields = ["employed individual monthly income"] income_dist_data, total_workers = get_stat_data( fields, geo, session, exclude=["Not applicable"], recode=recode, key_order=recode.values(), ) # median income median = calculate_median_stat(income_dist_data) median_income = ESTIMATED_MONTHLY_INCOME_CATEGORIES[median] profile.update( { "individual_income_distribution": income_dist_data, "median_individual_income": { "name": "Average monthly income", "values": {"this": median_income}, }, } ) else: # distribution recode = COLLAPSED_ANNUAL_INCOME_CATEGORIES fields = ["employed individual annual income"] income_dist_data, total_workers = get_stat_data( fields, geo, session, exclude=["Not applicable"], recode=recode, key_order=recode.values(), ) # median income median = calculate_median_stat(income_dist_data) median_income = ESTIMATED_ANNUAL_INCOME_CATEGORIES[median] profile.update( { "individual_annual_income_distribution": income_dist_data, "median_annual_individual_income": { "name": "Average annual income", "values": {"this": median_income}, }, } ) # employment status employ_status, total_workers = get_stat_data( ["official employment status"], geo, session, exclude=["Age less than 15 years", "Not applicable"], order_by="official employment status", table_name="officialemploymentstatus", ) # sector sector_dist_data, _ = get_stat_data( ["type of sector"], geo, session, exclude=["Not applicable"], order_by="type of sector", ) profile.update( { "employment_status": employ_status, "sector_type_distribution": sector_dist_data, } ) # access to internet if current_context().get("year") == "latest": internet_access_dist, total_households = get_stat_data( ["access to internet"], geo, session, recode=INTERNET_ACCESS_RECODE, table_name="accesstointernet_2016", ) profile.update({"internet_access_distribution": internet_access_dist}) else: internet_access_dist, total_with_access = get_stat_data( ["access to internet"], geo, session, exclude=["No access to internet"], order_by="access to internet", ) _, total_without_access = get_stat_data( ["access to internet"], geo, session, only=["No access to internet"] ) total_households = total_with_access + total_without_access profile.update( { "internet_access_distribution": internet_access_dist, "internet_access": { "name": "Households with internet access", "values": {"this": percent(total_with_access, total_households)}, "numerators": {"this": total_with_access}, }, } ) return profile
def get_stat_data( self, geo, fields=None, key_order=None, percent=True, total=None, recode=None, year=None, ): """ Get a data dictionary for a place from this table. This fetches the values for each column in this table and returns a data dictionary for those values, with appropriate names and metadata. :param geo: the geography :param str or list fields: the columns to fetch stats for. By default, all columns except geo-related and the total column (if any) are used. :param str key_order: explicit ordering of (recoded) keys, or None for the default order. Default order is the order in +fields+ if given, otherwise it's the natural column order from the DB. :param bool percent: should we calculate percentages, or just include raw values? :param int total: the total value to use for percentages, name of a field, or None to use the sum of all retrieved fields (default) :param dict recode: map from field names to strings to recode column names. Many fields can be recoded to the same thing, their values will be summed. :param str year: release year to use. None will try to use the current dataset context, and 'latest' will use the latest release. :return: (data-dictionary, total) """ db_table = self.get_db_table(year=year or current_context().get("year")) model = db_table.model columns = self.columns(db_table) session = get_session() try: if fields is not None and not isinstance(fields, list): fields = [fields] if fields: for f in fields: if f not in columns: raise ValueError( "Invalid field/column '%s' for table '%s'. Valid columns are: %s" % (f, self.id, ", ".join(columns.keys())) ) else: fields = columns.keys() if self.total_column: fields.remove(self.total_column) recode = recode or {} if recode: # change lambda to dicts if not isinstance(recode, dict): recode = {f: recode(f) for f in fields} # is the total column valid? if isinstance(total, basestring) and total not in columns: raise ValueError( "Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % (total, self.id, ", ".join(columns.keys())) ) # table columns to fetch cols = [model.__table__.columns[c] for c in fields] if ( total is not None and isinstance(total, basestring) and total not in cols ): cols.append(total) # do the query. If this returns no data, row is None row = ( session.query(*cols) .filter( model.geo_level == geo.geo_level, model.geo_code == geo.geo_code, model.geo_version == geo.version, ) .first() ) if row is None: row = ZeroRow() # what's our denominator? if total is None: # sum of all columns total = sum(getattr(row, f) or 0 for f in fields) elif isinstance(total, basestring): total = getattr(row, total) # Now build a data dictionary based on the columns in +row+. # Multiple columns may be recoded into one, so we have to # accumulate values as we go. results = OrderedDict() key_order = ( key_order or fields ) # default key order is just the list of fields for field in key_order: val = getattr(row, field) or 0 # recode the key for this field, default is to keep it the same key = recode.get(field, field) # set the recoded field name, noting that the key may already # exist if another column recoded to it field_info = results.setdefault( key, {"name": recode.get(field, columns[field]["name"])} ) if percent: # sum up existing values, if any val = val + field_info.get("numerators", {}).get("this", 0) field_info["values"] = {"this": p(val, total)} field_info["numerators"] = {"this": val} else: # sum up existing values, if any val = val + field_info.get("values", {}).get("this", 0) field_info["values"] = {"this": val} add_metadata(results, self, db_table.active_release) return results, total finally: session.close()
def get_demographics_profile(geo, session, display_profile, comparative=False): youth_pop_table = get_datatable('youth_population') youth_pop, pop_total = youth_pop_table.get_stat_data( geo, total='total_pop', percent='False') youth_age_group_data, _ = get_stat_data( ['age groups in 10 years'], geo, session, table_universe='Population', table_dataset='Census and Community Survey') youth_gender_data, _ = get_stat_data( ['gender'], geo, session, table_fields = ['gender', 'population group'], table_universe='Youth', table_dataset='Census and Community Survey', key_order=GENDER_ORDER) population_group_order = (POPULATION_GROUP_ORDER_2016 if current_context().get('year') == 'latest' else POPULATION_GROUP_ORDER) youth_pop_group_data, _ = get_stat_data( ['population group'], geo, session, table_fields = ['population group', 'gender'], table_universe='Youth', table_dataset='Census and Community Survey', table_name='youth_population_group_gender', key_order=population_group_order) youth_language_data, _ = get_stat_data( ['language'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', order_by='-total' ) youth_language_most_spoken = youth_language_data[youth_language_data.keys()[0]] youth_province_birth_data, _ = get_stat_data( ['province of birth'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', key_order=PROVINCE_ORDER) youth_region_birth_data, _ = get_stat_data( ['region of birth'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', key_order=REGION_ORDER) youth_region_birth_data['SADC']['name'] = 'SADC*' youth_citizenship_data, _ = get_stat_data( ['citizenship'], geo, session, table_universe='Youth', table_dataset='Census and Community Survey', key_order=CITIZENSHIP_ORDER) final_data = { 'total_population': { "name": "People", "values": {"this": pop_total} }, 'youth_population_total': { "name": "Youth aged 15-24", "values": {"this": youth_pop['youth_pop']['numerators']['this']} }, 'youth_population_perc': { "name": "Of the population are youth aged 15-24", "values": {"this": youth_pop['youth_pop']['values']['this']}, }, 'youth_population_by_age_group': youth_age_group_data, 'youth_population_by_gender': youth_gender_data, 'youth_population_by_pop_group': youth_pop_group_data, 'youth_language_most_spoken': youth_language_most_spoken, 'youth_population_by_language': youth_language_data, 'youth_born_in_sa': { "name": "Of the youth population were born in South Africa", "values": {"this": youth_region_birth_data['South Africa']['values']['this']}, }, 'youth_by_province_of_birth': youth_province_birth_data, 'youth_by_region_of_birth': youth_region_birth_data, 'youth_sa_citizenship': { 'name': 'of the youth population are South African citizens', 'values': {'this': youth_citizenship_data['Yes']['values']['this']} }, 'youth_by_citizenship': youth_citizenship_data, } # The following info is displayed in the block over the map if geo.square_kms: final_data['population_density'] = { 'name': "youth per square kilometre", 'values': {"this": youth_pop['youth_pop']['numerators']['this'] / geo.square_kms} } return final_data