def get_education_profile(geo_code, geo_level, session): db_model = get_model_from_fields( ['highest educational level 20 and older'], geo_level) objects = get_objects_by_geo(db_model, geo_code, geo_level, session) edu_dist_data = {} get_or_higher = 0.0 fet_or_higher = 0.0 total = 0.0 for i, obj in enumerate(objects): category_val = getattr(obj, 'highest educational level 20 and older') # increment counters total += obj.total if category_val in EDUCATION_GET_OR_HIGHER: get_or_higher += obj.total if category_val in EDUCATION_FET_OR_HIGHER: fet_or_higher += obj.total # add data points for category edu_dist_data[str(i)] = { "name": category_val, "numerators": { "this": obj.total }, } edu_dist_data = collapse_categories(edu_dist_data, COLLAPSED_EDUCATION_CATEGORIES, key_order=EDUCATION_KEY_ORDER) edu_split_data = { 'percent_get_or_higher': { "name": "Completed Grade 9 or higher", "numerators": { "this": get_or_higher }, }, 'percent_fet_or_higher': { "name": "Completed Matric or higher", "numerators": { "this": fet_or_higher }, } } # calculate percentages for data in (edu_dist_data, edu_split_data): for fields in data.values(): fields["values"] = { "this": round(fields["numerators"]["this"] / total * 100, 2) } edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} add_metadata(edu_dist_data, db_model) return { 'educational_attainment_distribution': edu_dist_data, 'educational_attainment': edu_split_data }
def get_education_profile(geo_code, geo_level, session): db_model = get_model_from_fields(['highest educational level 20 and older'], geo_level) objects = get_objects_by_geo(db_model, geo_code, geo_level, session) edu_dist_data = {} get_or_higher = 0.0 fet_or_higher = 0.0 total = 0.0 for i, obj in enumerate(objects): category_val = getattr(obj, 'highest educational level 20 and older') # increment counters total += obj.total if category_val in EDUCATION_GET_OR_HIGHER: get_or_higher += obj.total if category_val in EDUCATION_FET_OR_HIGHER: fet_or_higher += obj.total # add data points for category edu_dist_data[str(i)] = { "name": category_val, "numerators": {"this": obj.total}, } edu_dist_data = collapse_categories(edu_dist_data, COLLAPSED_EDUCATION_CATEGORIES, key_order=EDUCATION_KEY_ORDER) edu_split_data = { 'percent_get_or_higher': { "name": "Completed Grade 9 or higher", "numerators": {"this": get_or_higher}, }, 'percent_fet_or_higher': { "name": "Completed Matric or higher", "numerators": {"this": fet_or_higher}, } } # calculate percentages for data in (edu_dist_data, edu_split_data): for fields in data.values(): fields["values"] = {"this": round(fields["numerators"]["this"] / total * 100, 2)} edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} add_metadata(edu_dist_data, db_model) return {'educational_attainment_distribution': edu_dist_data, 'educational_attainment': edu_split_data}
def get_stat_data(self, geo_level, geo_code, fields=None, key_order=None, percent=True, total=None, recode=None): """ Get a data dictionary for a place from this table. This fetches the values for each column in this table and returns a data dictionary for those values, with appropriate names and metadata. :param str geo_level: the geographical level :param str geo_code: the geographical code :param str or list fields: the columns to fetch stats for. By default, all columns except geo-related and the total column (if any) are used. :param str key_order: explicit ordering of (recoded) keys, or None for the default order. Default order is the order in +fields+ if given, otherwise it's the natural column order from the DB. :param bool percent: should we calculate percentages, or just include raw values? :param int total: the total value to use for percentages, name of a field, or None to use the sum of all retrieved fields (default) :param dict recode: map from field names to strings to recode column names. Many fields can be recoded to the same thing, their values will be summed. :return: (data-dictionary, total) """ session = get_session() try: if fields is not None and not isinstance(fields, list): fields = [fields] if fields: for f in fields: if f not in self.columns: raise ValueError( "Invalid field/column '%s' for table '%s'. Valid columns are: %s" % (f, self.id, ', '.join(self.columns.keys()))) else: fields = self.columns.keys() recode = recode or {} if recode: # change lambda to dicts if not isinstance(recode, dict): recode = {f: recode(f) for f in fields} # is the total column valid? if isinstance(total, basestring) and total not in self.columns: raise ValueError( "Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % (total, self.id, ', '.join(self.columns.keys()))) # table columns to fetch cols = [self.model.columns[c] for c in fields] if total is not None and isinstance( total, basestring) and total not in cols: cols.append(total) # do the query. If this returns no data, row is None row = session\ .query(*cols)\ .filter(self.model.c.geo_level == geo_level, self.model.c.geo_code == geo_code)\ .first() if row is None: row = ZeroRow() # what's our denominator? if total is None: # sum of all columns total = sum(getattr(row, f) or 0 for f in fields) elif isinstance(total, basestring): total = getattr(row, total) # Now build a data dictionary based on the columns in +row+. # Multiple columns may be recoded into one, so we have to # accumulate values as we go. results = OrderedDict() key_order = key_order or fields # default key order is just the list of fields for field in key_order: val = getattr(row, field) or 0 # recode the key for this field, default is to keep it the same key = recode.get(field, field) # set the recoded field name, noting that the key may already # exist if another column recoded to it field_info = results.setdefault( key, {'name': recode.get(field, self.columns[field]['name'])}) if percent: # sum up existing values, if any val = val + field_info.get('numerators', {}).get('this', 0) field_info['values'] = {'this': p(val, total)} field_info['numerators'] = {'this': val} else: # sum up existing values, if any val = val + field_info.get('values', {}).get('this', 0) field_info['values'] = {'this': val} add_metadata(results, self) return results, total finally: session.close()
def get_stat_data(fields, geo_level, geo_code, session, order_by=None, percent=True, total=None, table_fields=None, table_name=None, only=None, exclude=None, exclude_zero=False, recode=None, key_order=None, table_dataset=None): """ This is our primary helper routine for building a dictionary suitable for a place's profile page, based on a statistic. It sums over the data for +fields+ in the database for the place identified by +geo_level+ and +geo_code+ and calculates numerators and values. If multiple fields are given, it creates nested result dictionaries. Control the rows that are included or ignored using +only+, +exclude+ and +exclude_zero+. The field values can be recoded using +recode+ and and re-ordered using +key_order+. :param str or list fields: the census field to build stats for. Specify a list of fields to build nested statistics. If multiple fields are specified, then the values of parameters such as +only+, +exclude+ and +recode+ will change. These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level' :param str geo_level: the geographical level :param str geo_code: the geographical code :param dbsession session: sqlalchemy session :param str order_by: field to order by, or None for default, eg. '-total' :param bool percent: should we calculate percentages, or just sum raw values? :param list table_fields: list of fields to use to find the table, defaults to `fields` :param int total: the total value to use for percentages, or None to total columns automatically :param str table_name: override the table name, otherwise it's calculated from the fields and geo_level :param dict or list only: only include these field values. If +fields+ has many items, this must be a dict mapping field names to a list of strings. :param dict or list exclude: ignore these field values. If +fields+ has many items, this must be a dict mapping field names to a list of strings. Field names are checked before any recoding. :param bool exclude_zero: ignore fields that have a zero total :param dict or lambda: function or dict to recode values of +key_field+. If +fields+ is a singleton, then the keys of this dict must be the values to recode from, otherwise they must be the field names and then the values. If this is a lambda, it is called with the field name and its value as arguments. :param dict or list key_order: ordering for keys in result dictionary. If +fields+ has many items, this must be a dict from field names to orderings. The default ordering is determined by +order+. :param str table_dataset: dataset used to help find the table if +table_name+ isn't given. :return: (data-dictionary, total) """ if not isinstance(fields, list): fields = [fields] n_fields = len(fields) many_fields = n_fields > 1 if order_by is None: order_by = fields[0] if only is not None: if not isinstance(only, dict): if many_fields: raise ValueError("If many fields are given, then only must be a dict. I got %s instead" % only) else: only = {fields[0]: set(only)} if exclude is not None: if not isinstance(exclude, dict): if many_fields: raise ValueError("If many fields are given, then exclude must be a dict. I got %s instead" % exclude) else: exclude = {fields[0]: set(exclude)} if key_order: if not isinstance(key_order, dict): if many_fields: raise ValueError("If many fields are given, then key_order must be a dict. I got %s instead" % key_order) else: key_order = {fields[0]: key_order} else: key_order = {} if total is not None and many_fields: raise ValueError("Cannot specify a total if many fields are given") if recode: if not isinstance(recode, dict) or not many_fields: recode = dict((f, recode) for f in fields) model = get_model_from_fields(table_fields or fields, geo_level, table_name, table_dataset) objects = get_objects_by_geo(model, geo_code, geo_level, session, fields=fields, order_by=order_by) root_data = OrderedDict() our_total = {} def get_data_object(obj): """ Recurse down the list of fields and return the final resting place for data for this stat. """ data = root_data for i, field in enumerate(fields): key = getattr(obj, field) if only and field in only and key not in only.get(field, {}): return key, None if exclude and key in exclude.get(field, {}): return key, None if recode and field in recode: recoder = recode[field] if isinstance(recoder, dict): key = recoder.get(key, key) else: key = recoder(field, key) else: key = capitalize(key) # enforce key ordering if not data and field in key_order: for fld in key_order[field]: data[fld] = OrderedDict() # ensure it's there if key not in data: data[key] = OrderedDict() data = data[key] # default values for intermediate fields if data is not None and i < n_fields - 1: data['metadata'] = {'name': key} # data is now the dict where the end value is going to go if not data: data['name'] = key data['numerators'] = {'this': 0.0} return key, data # run the stats for the objects for obj in objects: if obj.total == 0 and exclude_zero: continue # get the data dict where these values must go key, data = get_data_object(obj) if not data: continue our_total[key] = our_total.get(key, 0.0) + obj.total data['numerators']['this'] += obj.total if total is not None: grand_total = total else: grand_total = sum(our_total.values()) # add in percentages def calc_percent(data): for key, data in data.iteritems(): if not key == 'metadata': if 'numerators' in data: if percent: tot = our_total[key] if many_fields else grand_total perc = 0 if tot == 0 else (data['numerators']['this'] / tot * 100) data['values'] = {'this': round(perc, 2)} else: data['values'] = dict(data['numerators']) data['numerators']['this'] = None else: calc_percent(data) calc_percent(root_data) add_metadata(root_data, model) return root_data, grand_total
def get_service_delivery_profile(geo_code, geo_level, session): # water source water_src_data, total_wsrc = get_stat_data( ['source of water'], geo_level, geo_code, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by='-total') if 'Service provider' in water_src_data: total_water_sp = water_src_data['Service provider']['numerators'][ 'this'] else: total_water_sp = 0.0 # refuse disposal db_model_ref = get_model_from_fields(['refuse disposal'], geo_level) objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session, order_by='-total') refuse_disp_data = OrderedDict() total_ref = 0.0 total_ref_sp = 0.0 for obj in objects: attr = getattr(obj, 'refuse disposal') disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr] refuse_disp_data[disp] = { "name": disp, "numerators": { "this": obj.total }, } total_ref += obj.total if attr.startswith('Removed by local authority'): total_ref_sp += obj.total # electricity elec_attrs = [ 'electricity for cooking', 'electricity for heating', 'electricity for lighting' ] db_model_elec = get_model_from_fields(elec_attrs, geo_level) objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": { "this": 0.0 }, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": { "this": 0.0 }, }, 'total_no_elec': { "name": "No electricity", "numerators": { "this": 0.0 }, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators'][ 'this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators'][ 'this'] += obj.total else: elec_access_data['total_no_elec']['numerators'][ 'this'] += obj.total for data, total in zip((refuse_disp_data, elec_access_data), (total_ref, total_elec)): for fields in data.values(): fields["values"] = { "this": percent(fields["numerators"]["this"], total) } add_metadata(refuse_disp_data, db_model_ref) add_metadata(elec_access_data, db_model_elec) # toilets toilet_data, total_toilet = get_stat_data( ['toilet facilities'], geo_level, geo_code, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by='-total') total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith('Flush') or key.startswith('Chemical'): total_flush_toilet += data['numerators']['this'] if key == 'None': total_no_toilet += data['numerators']['this'] return { 'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": { "this": total_water_sp }, "values": { "this": percent(total_water_sp, total_wsrc) }, }, 'refuse_disposal_distribution': refuse_disp_data, 'percentage_ref_disp_from_service_provider': { "name": "Are getting refuse disposal from a local authority or private company", "numerators": { "this": total_ref_sp }, "values": { "this": percent(total_ref_sp, total_ref) }, }, 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": { "this": total_some_elec }, "values": { "this": percent(total_some_elec, total_elec) }, }, 'electricity_access_distribution': elec_access_data, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": { "this": total_flush_toilet }, "values": { "this": percent(total_flush_toilet, total_toilet) }, }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": { "this": total_no_toilet }, "values": { "this": percent(total_no_toilet, total_toilet) }, }, 'toilet_facilities_distribution': toilet_data, }
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data(['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data(['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender_%s' % geo_level) query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') geo_attr = '%s_code' % geo_level query = query.filter(getattr(db_model_sex, geo_attr) == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": { "this": round((total_pop - total_male) / total_pop * 100, 2) }, "numerators": { "this": total_pop - total_male }, }), ('Male', { "name": "Male", "values": { "this": round(total_male / total_pop * 100, 2) }, "numerators": { "this": total_male }, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": { "this": total_pop }, } } geo = get_geography(geo_code, geo_level) if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': { "this": total_pop / geo.square_kms }, } # median age/age category db_model_age = get_model_from_fields(['age in completed years'], geo_level, table_name='ageincompletedyears_%s' % geo_level) objects = sorted(get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years'))) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": { "this": median }, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo_level, geo_code, session, table_name='ageincompletedyearssimplified_%s' % geo_level, key_order=['Under 18', '18 to 64', '65 and over'], recode={ '< 18': 'Under 18', '>= 65': '65 and over' }) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data(['citizenship'], geo_level, geo_code, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': { 'this': percent(sa_citizen, total_pop) }, 'numerators': { 'this': sa_citizen }, } # migration province_of_birth_dist, _ = get_stat_data(['province of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return key region_of_birth_dist, _ = get_stat_data(['region of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': { 'this': percent(born_in_sa, total_pop) }, 'numerators': { 'this': born_in_sa }, } return final_data
def get_service_delivery_profile(geo_code, geo_level, session): # water source water_src_data, total_wsrc = get_stat_data( ['source of water'], geo_level, geo_code, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by='-total') if 'Service provider' in water_src_data: total_water_sp = water_src_data['Service provider']['numerators']['this'] else: total_water_sp = 0.0 # refuse disposal db_model_ref = get_model_from_fields(['refuse disposal'], geo_level) objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session, order_by='-total') refuse_disp_data = OrderedDict() total_ref = 0.0 total_ref_sp = 0.0 for obj in objects: attr = getattr(obj, 'refuse disposal') disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr] refuse_disp_data[disp] = { "name": disp, "numerators": {"this": obj.total}, } total_ref += obj.total if attr.startswith('Removed by local authority'): total_ref_sp += obj.total # electricity elec_attrs = ['electricity for cooking', 'electricity for heating', 'electricity for lighting'] db_model_elec = get_model_from_fields(elec_attrs, geo_level) objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": {"this": 0.0}, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": {"this": 0.0}, }, 'total_no_elec': { "name": "No electricity", "numerators": {"this": 0.0}, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators']['this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total else: elec_access_data['total_no_elec']['numerators']['this'] += obj.total for data, total in zip((refuse_disp_data, elec_access_data), (total_ref, total_elec)): for fields in data.values(): fields["values"] = {"this": percent(fields["numerators"]["this"], total)} add_metadata(refuse_disp_data, db_model_ref) add_metadata(elec_access_data, db_model_elec) # toilets toilet_data, total_toilet = get_stat_data( ['toilet facilities'], geo_level, geo_code, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by='-total') total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith('Flush') or key.startswith('Chemical'): total_flush_toilet += data['numerators']['this'] if key == 'None': total_no_toilet += data['numerators']['this'] return {'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": {"this": total_water_sp}, "values": {"this": percent(total_water_sp, total_wsrc)}, }, 'refuse_disposal_distribution': refuse_disp_data, 'percentage_ref_disp_from_service_provider': { "name": "Are getting refuse disposal from a local authority or private company", "numerators": {"this": total_ref_sp}, "values": {"this": percent(total_ref_sp, total_ref)}, }, 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": {"this": total_some_elec}, "values": {"this": percent(total_some_elec, total_elec)}, }, 'electricity_access_distribution': elec_access_data, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": {"this": total_flush_toilet}, "values": {"this": percent(total_flush_toilet, total_toilet)}, }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": {"this": total_no_toilet}, "values": {"this": percent(total_no_toilet, total_toilet)}, }, 'toilet_facilities_distribution': toilet_data, }
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender_%s' % geo_level) query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') geo_attr = '%s_code' % geo_level query = query.filter(getattr(db_model_sex, geo_attr) == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)}, "numerators": {"this": total_pop - total_male}, }), ('Male', { "name": "Male", "values": {"this": round(total_male / total_pop * 100, 2)}, "numerators": {"this": total_male}, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop}, } } geo = get_geography(geo_code, geo_level) if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': {"this": total_pop / geo.square_kms}, } # median age/age category db_model_age = get_model_from_fields( ['age in completed years'], geo_level, table_name='ageincompletedyears_%s' % geo_level ) objects = sorted( get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo_level, geo_code, session, table_name='ageincompletedyearssimplified_%s' % geo_level, key_order=['Under 18', '18 to 64', '65 and over'], recode={'< 18': 'Under 18', '>= 65': '65 and over'}) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ['citizenship'], geo_level, geo_code, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': {'this': percent(sa_citizen, total_pop)}, 'numerators': {'this': sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ['province of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return key region_of_birth_dist, _ = get_stat_data( ['region of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': {'this': percent(born_in_sa, total_pop)}, 'numerators': {'this': born_in_sa}, } return final_data
def get_stat_data(self, geo_level, geo_code, fields=None, key_order=None, percent=True, total=None, recode=None): """ Get a data dictionary for a place from this table. This fetches the values for each column in this table and returns a data dictionary for those values, with appropriate names and metadata. :param str geo_level: the geographical level :param str geo_code: the geographical code :param str or list fields: the columns to fetch stats for. By default, all columns except geo-related and the total column (if any) are used. :param str key_order: explicit ordering of (recoded) keys, or None for the default order. Default order is the order in +fields+ if given, otherwise it's the natural column order from the DB. :param bool percent: should we calculate percentages, or just include raw values? :param int total: the total value to use for percentages, name of a field, or None to use the sum of all retrieved fields (default) :param dict recode: map from field names to strings to recode column names. Many fields can be recoded to the same thing, their values will be summed. :return: (data-dictionary, total) """ session = get_session() try: if fields is not None and not isinstance(fields, list): fields = [fields] if fields: for f in fields: if f not in self.columns: raise ValueError("Invalid field/column '%s' for table '%s'. Valid columns are: %s" % ( f, self.id, ', '.join(self.columns.keys()))) else: fields = self.columns.keys() recode = recode or {} if recode: # change lambda to dicts if not isinstance(recode, dict): recode = {f: recode(f) for f in fields} # is the total column valid? if isinstance(total, basestring) and total not in self.columns: raise ValueError("Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % ( total, self.id, ', '.join(self.columns.keys()))) # table columns to fetch cols = [self.model.columns[c] for c in fields] if total is not None and isinstance(total, basestring) and total not in cols: cols.append(total) # do the query. If this returns no data, row is None row = session\ .query(*cols)\ .filter(self.model.c.geo_level == geo_level, self.model.c.geo_code == geo_code)\ .first() if row is None: row = ZeroRow() # what's our denominator? if total is None: # sum of all columns total = sum(getattr(row, f) or 0 for f in fields) elif isinstance(total, basestring): total = getattr(row, total) # Now build a data dictionary based on the columns in +row+. # Multiple columns may be recoded into one, so we have to # accumulate values as we go. results = OrderedDict() key_order = key_order or fields # default key order is just the list of fields for field in key_order: val = getattr(row, field) or 0 # recode the key for this field, default is to keep it the same key = recode.get(field, field) # set the recoded field name, noting that the key may already # exist if another column recoded to it field_info = results.setdefault(key, {'name': recode.get(field, self.columns[field]['name'])}) if percent: # sum up existing values, if any val = val + field_info.get('numerators', {}).get('this', 0) field_info['values'] = {'this': p(val, total)} field_info['numerators'] = {'this': val} else: # sum up existing values, if any val = val + field_info.get('values', {}).get('this', 0) field_info['values'] = {'this': val} add_metadata(results, self) return results, total finally: session.close()