Beispiel #1
0
def get_education_profile(geo_code, geo_level, session):
    db_model = get_model_from_fields(
        ['highest educational level 20 and older'], geo_level)
    objects = get_objects_by_geo(db_model, geo_code, geo_level, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level 20 and older')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {
                "this": obj.total
            },
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=EDUCATION_KEY_ORDER)
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {
                "this": get_or_higher
            },
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {
                "this": fet_or_higher
            },
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {
                "this": round(fields["numerators"]["this"] / total * 100, 2)
            }

    edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}
    edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}

    add_metadata(edu_dist_data, db_model)

    return {
        'educational_attainment_distribution': edu_dist_data,
        'educational_attainment': edu_split_data
    }
Beispiel #2
0
def get_education_profile(geo_code, geo_level, session):
    db_model = get_model_from_fields(['highest educational level 20 and older'], geo_level)
    objects = get_objects_by_geo(db_model, geo_code, geo_level, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level 20 and older')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {"this": obj.total},
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=EDUCATION_KEY_ORDER)
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {"this": get_or_higher},
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {"this": fet_or_higher},
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}
    edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}

    add_metadata(edu_dist_data, db_model)

    return {'educational_attainment_distribution': edu_dist_data,
            'educational_attainment': edu_split_data}
Beispiel #3
0
    def get_stat_data(self,
                      geo_level,
                      geo_code,
                      fields=None,
                      key_order=None,
                      percent=True,
                      total=None,
                      recode=None):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param str geo_level: the geographical level
        :param str geo_code: the geographical code
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.

        :return: (data-dictionary, total)
        """

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in self.columns:
                        raise ValueError(
                            "Invalid field/column '%s' for table '%s'. Valid columns are: %s"
                            % (f, self.id, ', '.join(self.columns.keys())))
            else:
                fields = self.columns.keys()

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in self.columns:
                raise ValueError(
                    "Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s"
                    % (total, self.id, ', '.join(self.columns.keys())))

            # table columns to fetch
            cols = [self.model.columns[c] for c in fields]
            if total is not None and isinstance(
                    total, basestring) and total not in cols:
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = session\
                .query(*cols)\
                .filter(self.model.c.geo_level == geo_level,
                        self.model.c.geo_code == geo_code)\
                .first()

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = key_order or fields  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(
                    key,
                    {'name': recode.get(field, self.columns[field]['name'])})

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get('numerators', {}).get('this', 0)
                    field_info['values'] = {'this': p(val, total)}
                    field_info['numerators'] = {'this': val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get('values', {}).get('this', 0)
                    field_info['values'] = {'this': val}

            add_metadata(results, self)
            return results, total
        finally:
            session.close()
Beispiel #4
0
def get_stat_data(fields, geo_level, geo_code, session, order_by=None,
                  percent=True, total=None, table_fields=None,
                  table_name=None, only=None, exclude=None, exclude_zero=False,
                  recode=None, key_order=None, table_dataset=None):
    """
    This is our primary helper routine for building a dictionary suitable for
    a place's profile page, based on a statistic.

    It sums over the data for +fields+ in the database for the place identified by
    +geo_level+ and +geo_code+ and calculates numerators and values. If multiple
    fields are given, it creates nested result dictionaries.

    Control the rows that are included or ignored using +only+, +exclude+ and +exclude_zero+.

    The field values can be recoded using +recode+ and and re-ordered using +key_order+.

    :param str or list fields: the census field to build stats for. Specify a list of fields to build
                               nested statistics. If multiple fields are specified, then the values
                               of parameters such as +only+, +exclude+ and +recode+ will change.
                               These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level'
    :param str geo_level: the geographical level
    :param str geo_code: the geographical code
    :param dbsession session: sqlalchemy session
    :param str order_by: field to order by, or None for default, eg. '-total'
    :param bool percent: should we calculate percentages, or just sum raw values?
    :param list table_fields: list of fields to use to find the table, defaults to `fields`
    :param int total: the total value to use for percentages, or None to total columns automatically
    :param str table_name: override the table name, otherwise it's calculated from the fields and geo_level
    :param dict or list only: only include these field values. If +fields+ has many items, this must be a dict
                              mapping field names to a list of strings.
    :param dict or list exclude: ignore these field values. If +fields+ has many items, this must be a dict
                                 mapping field names to a list of strings. Field names are checked
                                 before any recoding.
    :param bool exclude_zero: ignore fields that have a zero total
    :param dict or lambda: function or dict to recode values of +key_field+. If +fields+ is a singleton,
                           then the keys of this dict must be the values to recode from, otherwise
                           they must be the field names and then the values. If this is a lambda,
                           it is called with the field name and its value as arguments.
    :param dict or list key_order: ordering for keys in result dictionary. If +fields+ has many items,
                                   this must be a dict from field names to orderings.
                                   The default ordering is determined by +order+.
    :param str table_dataset: dataset used to help find the table if +table_name+ isn't given.

    :return: (data-dictionary, total)
    """

    if not isinstance(fields, list):
        fields = [fields]

    n_fields = len(fields)
    many_fields = n_fields > 1

    if order_by is None:
        order_by = fields[0]

    if only is not None:
        if not isinstance(only, dict):
            if many_fields:
                raise ValueError("If many fields are given, then only must be a dict. I got %s instead" % only)
            else:
                only = {fields[0]: set(only)}

    if exclude is not None:
        if not isinstance(exclude, dict):
            if many_fields:
                raise ValueError("If many fields are given, then exclude must be a dict. I got %s instead" % exclude)
            else:
                exclude = {fields[0]: set(exclude)}

    if key_order:
        if not isinstance(key_order, dict):
            if many_fields:
                raise ValueError("If many fields are given, then key_order must be a dict. I got %s instead" % key_order)
            else:
                key_order = {fields[0]: key_order}
    else:
        key_order = {}

    if total is not None and many_fields:
        raise ValueError("Cannot specify a total if many fields are given")

    if recode:
        if not isinstance(recode, dict) or not many_fields:
            recode = dict((f, recode) for f in fields)

    model = get_model_from_fields(table_fields or fields, geo_level, table_name, table_dataset)
    objects = get_objects_by_geo(model, geo_code, geo_level, session, fields=fields, order_by=order_by)

    root_data = OrderedDict()
    our_total = {}

    def get_data_object(obj):
        """ Recurse down the list of fields and return the
        final resting place for data for this stat. """
        data = root_data

        for i, field in enumerate(fields):
            key = getattr(obj, field)

            if only and field in only and key not in only.get(field, {}):
                return key, None

            if exclude and key in exclude.get(field, {}):
                return key, None

            if recode and field in recode:
                recoder = recode[field]
                if isinstance(recoder, dict):
                    key = recoder.get(key, key)
                else:
                    key = recoder(field, key)
            else:
                key = capitalize(key)

            # enforce key ordering
            if not data and field in key_order:
                for fld in key_order[field]:
                    data[fld] = OrderedDict()

            # ensure it's there
            if key not in data:
                data[key] = OrderedDict()

            data = data[key]

            # default values for intermediate fields
            if data is not None and i < n_fields - 1:
                data['metadata'] = {'name': key}

        # data is now the dict where the end value is going to go
        if not data:
            data['name'] = key
            data['numerators'] = {'this': 0.0}

        return key, data

    # run the stats for the objects
    for obj in objects:
        if obj.total == 0 and exclude_zero:
            continue

        # get the data dict where these values must go
        key, data = get_data_object(obj)
        if not data:
            continue

        our_total[key] = our_total.get(key, 0.0) + obj.total
        data['numerators']['this'] += obj.total

    if total is not None:
        grand_total = total
    else:
        grand_total = sum(our_total.values())

    # add in percentages
    def calc_percent(data):
        for key, data in data.iteritems():
            if not key == 'metadata':
                if 'numerators' in data:
                    if percent:
                        tot = our_total[key] if many_fields else grand_total
                        perc = 0 if tot == 0 else (data['numerators']['this'] / tot * 100)
                        data['values'] = {'this': round(perc, 2)}
                    else:
                        data['values'] = dict(data['numerators'])
                        data['numerators']['this'] = None
                else:
                    calc_percent(data)

    calc_percent(root_data)

    add_metadata(root_data, model)

    return root_data, grand_total
Beispiel #5
0
def get_service_delivery_profile(geo_code, geo_level, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ['source of water'],
        geo_level,
        geo_code,
        session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators'][
            'this']
    else:
        total_water_sp = 0.0

    # refuse disposal
    db_model_ref = get_model_from_fields(['refuse disposal'], geo_level)
    objects = get_objects_by_geo(db_model_ref,
                                 geo_code,
                                 geo_level,
                                 session,
                                 order_by='-total')
    refuse_disp_data = OrderedDict()
    total_ref = 0.0
    total_ref_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'refuse disposal')
        disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr]
        refuse_disp_data[disp] = {
            "name": disp,
            "numerators": {
                "this": obj.total
            },
        }
        total_ref += obj.total
        if attr.startswith('Removed by local authority'):
            total_ref_sp += obj.total

    # electricity
    elec_attrs = [
        'electricity for cooking', 'electricity for heating',
        'electricity for lighting'
    ]
    db_model_elec = get_model_from_fields(elec_attrs, geo_level)
    objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {
                "this": 0.0
            },
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {
                "this": 0.0
            },
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {
                "this": 0.0
            },
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators'][
                'this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators'][
                'this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators'][
                'this'] += obj.total

    for data, total in zip((refuse_disp_data, elec_access_data),
                           (total_ref, total_elec)):
        for fields in data.values():
            fields["values"] = {
                "this": percent(fields["numerators"]["this"], total)
            }

    add_metadata(refuse_disp_data, db_model_ref)
    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ['toilet facilities'],
        geo_level,
        geo_code,
        session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name":
            "Are getting water from a regional or local service provider",
            "numerators": {
                "this": total_water_sp
            },
            "values": {
                "this": percent(total_water_sp, total_wsrc)
            },
        },
        'refuse_disposal_distribution': refuse_disp_data,
        'percentage_ref_disp_from_service_provider': {
            "name":
            "Are getting refuse disposal from a local authority or private company",
            "numerators": {
                "this": total_ref_sp
            },
            "values": {
                "this": percent(total_ref_sp, total_ref)
            },
        },
        'percentage_electricity_access': {
            "name":
            "Have electricity for at least one of cooking, heating or lighting",
            "numerators": {
                "this": total_some_elec
            },
            "values": {
                "this": percent(total_some_elec, total_elec)
            },
        },
        'electricity_access_distribution': elec_access_data,
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {
                "this": total_flush_toilet
            },
            "values": {
                "this": percent(total_flush_toilet, total_toilet)
            },
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {
                "this": total_no_toilet
            },
            "values": {
                "this": percent(total_no_toilet, total_toilet)
            },
        },
        'toilet_facilities_distribution': toilet_data,
    }
Beispiel #6
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(['population group'], geo_level,
                                             geo_code, session)

    # language
    language_data, _ = get_stat_data(['language'],
                                     geo_level,
                                     geo_code,
                                     session,
                                     order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
        ['age groups in 5 years'],
        geo_level,
        geo_code,
        session,
        recode=COLLAPSED_AGE_CATEGORIES,
        key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69',
                   '70-79', '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'],
                                         geo_level,
                                         table_name='gender_%s' % geo_level)
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    geo_attr = '%s_code' % geo_level
    query = query.filter(getattr(db_model_sex, geo_attr) == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
        ('Female', {
            "name": "Female",
            "values": {
                "this": round((total_pop - total_male) / total_pop * 100, 2)
            },
            "numerators": {
                "this": total_pop - total_male
            },
        }),
        ('Male', {
            "name": "Male",
            "values": {
                "this": round(total_male / total_pop * 100, 2)
            },
            "numerators": {
                "this": total_male
            },
        }),
    ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {
                "this": total_pop
            },
        }
    }

    geo = get_geography(geo_code, geo_level)
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {
                "this": total_pop / geo.square_kms
            },
        }

    # median age/age category
    db_model_age = get_model_from_fields(['age in completed years'],
                                         geo_level,
                                         table_name='ageincompletedyears_%s' %
                                         geo_level)
    objects = sorted(get_objects_by_geo(db_model_age, geo_code, geo_level,
                                        session),
                     key=lambda x: int(getattr(x, 'age in completed years')))
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {
            "this": median
        },
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'],
        geo_level,
        geo_code,
        session,
        table_name='ageincompletedyearssimplified_%s' % geo_level,
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={
            '< 18': 'Under 18',
            '>= 65': '65 and over'
        })
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(['citizenship'],
                                        geo_level,
                                        geo_code,
                                        session,
                                        order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
        'name': 'South African citizens',
        'values': {
            'this': percent(sa_citizen, total_pop)
        },
        'numerators': {
            'this': sa_citizen
        },
    }

    # migration
    province_of_birth_dist, _ = get_stat_data(['province of birth'],
                                              geo_level,
                                              geo_code,
                                              session,
                                              exclude_zero=True,
                                              order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(['region of birth'],
                                            geo_level,
                                            geo_code,
                                            session,
                                            exclude_zero=True,
                                            order_by='-total',
                                            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
        'name': 'Born in South Africa',
        'values': {
            'this': percent(born_in_sa, total_pop)
        },
        'numerators': {
            'this': born_in_sa
        },
    }

    return final_data
Beispiel #7
0
def get_service_delivery_profile(geo_code, geo_level, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
            ['source of water'], geo_level, geo_code, session,
            recode=SHORT_WATER_SOURCE_CATEGORIES,
            order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators']['this']
    else:
        total_water_sp = 0.0

    # refuse disposal
    db_model_ref = get_model_from_fields(['refuse disposal'], geo_level)
    objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session,
                                 order_by='-total')
    refuse_disp_data = OrderedDict()
    total_ref = 0.0
    total_ref_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'refuse disposal')
        disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr]
        refuse_disp_data[disp] = {
            "name": disp,
            "numerators": {"this": obj.total},
        }
        total_ref += obj.total
        if attr.startswith('Removed by local authority'):
            total_ref_sp += obj.total

    # electricity
    elec_attrs = ['electricity for cooking',
                  'electricity for heating',
                  'electricity for lighting']
    db_model_elec = get_model_from_fields(elec_attrs, geo_level)
    objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {"this": 0.0},
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {"this": 0.0},
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {"this": 0.0},
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators']['this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators']['this'] += obj.total

    for data, total in zip((refuse_disp_data, elec_access_data),
                           (total_ref, total_elec)):
        for fields in data.values():
            fields["values"] = {"this": percent(fields["numerators"]["this"], total)}

    add_metadata(refuse_disp_data, db_model_ref)
    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
            ['toilet facilities'], geo_level, geo_code, session,
            exclude_zero=True,
            recode=COLLAPSED_TOILET_CATEGORIES,
            order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {'water_source_distribution': water_src_data,
            'percentage_water_from_service_provider': {
                "name": "Are getting water from a regional or local service provider",
                "numerators": {"this": total_water_sp},
                "values": {"this": percent(total_water_sp, total_wsrc)},
            },
            'refuse_disposal_distribution': refuse_disp_data,
            'percentage_ref_disp_from_service_provider': {
                "name": "Are getting refuse disposal from a local authority or private company",
                "numerators": {"this": total_ref_sp},
                "values": {"this": percent(total_ref_sp, total_ref)},
            },
            'percentage_electricity_access': {
                "name": "Have electricity for at least one of cooking, heating or lighting",
                "numerators": {"this": total_some_elec},
                "values": {"this": percent(total_some_elec, total_elec)},
            },
            'electricity_access_distribution': elec_access_data,
            'percentage_flush_toilet_access': {
                "name": "Have access to flush or chemical toilets",
                "numerators": {"this": total_flush_toilet},
                "values": {"this": percent(total_flush_toilet, total_toilet)},
            },
            'percentage_no_toilet_access': {
                "name": "Have no access to any toilets",
                "numerators": {"this": total_no_toilet},
                "values": {"this": percent(total_no_toilet, total_toilet)},
            },
            'toilet_facilities_distribution': toilet_data,
    }
Beispiel #8
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo_level, geo_code, session)

    # language
    language_data, _ = get_stat_data(
            ['language'], geo_level, geo_code, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo_level, geo_code, session,
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender_%s' % geo_level)
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    geo_attr = '%s_code' % geo_level
    query = query.filter(getattr(db_model_sex, geo_attr) == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)},
                "numerators": {"this": total_pop - total_male},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": round(total_male / total_pop * 100, 2)},
                "numerators": {"this": total_male},
            }),
        ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop},
        }
    }

    geo = get_geography(geo_code, geo_level)
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    db_model_age = get_model_from_fields(
        ['age in completed years'], geo_level,
        table_name='ageincompletedyears_%s' % geo_level
    )
    objects = sorted(
        get_objects_by_geo(db_model_age, geo_code, geo_level, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'], geo_level, geo_code, session,
        table_name='ageincompletedyearssimplified_%s' % geo_level,
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={'< 18': 'Under 18',
                '>= 65': '65 and over'})
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo_level, geo_code, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
Beispiel #9
0
    def get_stat_data(self, geo_level, geo_code, fields=None, key_order=None,
                      percent=True, total=None, recode=None):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param str geo_level: the geographical level
        :param str geo_code: the geographical code
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.

        :return: (data-dictionary, total)
        """

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in self.columns:
                        raise ValueError("Invalid field/column '%s' for table '%s'. Valid columns are: %s" % (
                            f, self.id, ', '.join(self.columns.keys())))
            else:
                fields = self.columns.keys()

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in self.columns:
                raise ValueError("Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % (
                    total, self.id, ', '.join(self.columns.keys())))

            # table columns to fetch
            cols = [self.model.columns[c] for c in fields]
            if total is not None and isinstance(total, basestring) and total not in cols:
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = session\
                .query(*cols)\
                .filter(self.model.c.geo_level == geo_level,
                        self.model.c.geo_code == geo_code)\
                .first()

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = key_order or fields  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(key, {'name': recode.get(field, self.columns[field]['name'])})

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get('numerators', {}).get('this', 0)
                    field_info['values'] = {'this': p(val, total)}
                    field_info['numerators'] = {'this': val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get('values', {}).get('this', 0)
                    field_info['values'] = {'this': val}

            add_metadata(results, self)
            return results, total
        finally:
            session.close()