Ejemplo n.º 1
0
def get_education_profile(geo_code, geo_level, session):
    db_model = get_model_from_fields(
        ['highest educational level 20 and older'], geo_level)
    objects = get_objects_by_geo(db_model, geo_code, geo_level, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level 20 and older')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {
                "this": obj.total
            },
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=EDUCATION_KEY_ORDER)
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {
                "this": get_or_higher
            },
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {
                "this": fet_or_higher
            },
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {
                "this": round(fields["numerators"]["this"] / total * 100, 2)
            }

    edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}
    edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}

    add_metadata(edu_dist_data, db_model)

    return {
        'educational_attainment_distribution': edu_dist_data,
        'educational_attainment': edu_split_data
    }
    def store_values(self):
        session = get_session()
        province_codes = dict(
            (p.name, p.code) for p in session.query(Province))
        session.commit()

        # cache of the db models for each geo level
        models = {}
        count = 0

        for geo_name, values in self.read_rows():
            count += 1
            geo_level = self.determine_level(geo_name)

            print geo_level, geo_name

            if geo_level == 'province':
                code = province_codes[geo_name]
            elif geo_level == 'country':
                code = 'ZA'
            else:
                code = geo_name.split(':')[0]
            base_kwargs = {'%s_code' % geo_level: code}

            # get db model and create table if necessary
            if geo_level in models:
                db_model = models[geo_level]
            else:
                if self.table_name:
                    table_name = self.table_name + '_' + geo_level
                else:
                    table_name = None

                models[geo_level] = db_model = get_model_from_fields(
                    self.fields, geo_level, table_name)
                Base.metadata.create_all(_engine, tables=[db_model.__table__])

            for category, value in zip(self.categories, values):
                # prepare the dict of args to pass to the db model for this row
                kwargs = base_kwargs.copy()
                if value.strip() == '-':
                    value = '0'

                kwargs.update(
                    dict((f, v) for f, v in zip(self.fields, category)))
                kwargs['total'] = int(value.replace(',', ''))

                # create and add the row
                session.add(db_model(**kwargs))

            if count % 100 == 0:
                session.flush()

        session.commit()
        session.close()
Ejemplo n.º 3
0
def get_education_profile(geo_code, geo_level, session):
    db_model = get_model_from_fields(['highest educational level'], geo_level,
                                     'highesteducationallevel_%s_25andover'
                                     % geo_level)
    objects = get_objects_by_geo(db_model, geo_code, geo_level, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {"this": obj.total},
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=('None', 'Other',
                                                   'Some primary', 'Primary',
                                                   'Grade 9', 'Some secondary',
                                                   'Grade 12 (Matric)',
                                                   'Undergrad',
                                                   'Post-grad'))
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {"this": get_or_higher},
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {"this": fet_or_higher},
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    edu_dist_data['metadata'] = {'universe': 'Invididuals 25 and over'}
    edu_split_data['metadata'] = {'universe': 'Invididuals 25 and over'}

    add_metadata(edu_dist_data, db_model)

    return {'educational_attainment_distribution': edu_dist_data,
            'educational_attainment': edu_split_data}
    def store_values(self):
        session = get_session()
        county_codes = dict((p.name.upper().replace('-', ' '), p.code) for p in session.query(County))
        session.commit()

        # cache of the db models for each geo level
        models = {}
        count = 0

        for geo_level, geo_name, category, total in self.read_rows():
            count += 1

            print geo_level, geo_name, category, total

            if geo_level == 'county':
                code = county_codes[geo_name.upper()]
            elif geo_level == 'country':
                code = 'KE'
            else:
                raise ValueError(geo_level)

            base_kwargs = {'%s_code' % geo_level: code} if code else {}

            # get db model and create table if necessary
            if geo_level in models:
                db_model = models[geo_level]
            else:
                if self.table_name:
                    table_name = self.table_name + '_' + geo_level
                else:
                    table_name = None

                try:
                    models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name)
                except ValueError as e:
                    raise ValueError('%s. Have you declared this field in a table in censusreporter/api/models/tables.py?' % e.message)
                Base.metadata.create_all(_engine, tables=[db_model.__table__])
                self.table_names.append(db_model.__table__.name)

            # prepare the dict of args to pass to the db model for this row
            kwargs = base_kwargs.copy()
            kwargs.update(dict((f, c) for f, c in zip(self.fields, category)))
            kwargs['total'] = total

            # create and add the row
            session.add(db_model(**kwargs))

            if count % 100 == 0:
                session.flush()

        session.commit()
        session.close()
Ejemplo n.º 5
0
    def store_values(self):
        session = get_session()
        province_codes = dict((p.name, p.code) for p in session.query(Province))
        session.commit()

        # cache of the db models for each geo level
        models = {}
        count = 0

        for geo_name, values in self.read_rows():
            count += 1
            geo_level = self.determine_level(geo_name)

            print geo_level, geo_name

            if geo_level == 'province':
                code = province_codes[geo_name]
            elif geo_name == 'country':
                code = 'ZA'
            else:
                code = geo_name.split(':')[0]
            base_kwargs = {'%s_code' % geo_level: code}

            # get db model and create table if necessary
            if geo_level in models:
                db_model = models[geo_level]
            else:
                if self.table_name:
                    table_name = self.table_name + '_' + geo_level
                else:
                    table_name = None

                models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name)
                Base.metadata.create_all(_engine, tables=[db_model.__table__])

            for category, value in zip(self.categories, values):
                # prepare the dict of args to pass to the db model for this row
                kwargs = base_kwargs.copy()
                if value.strip() == '-':
                    value = '0'

                kwargs.update(dict((f, v) for f, v in zip(self.fields, category)))
                kwargs['total'] = int(value.replace(',', ''))

                # create and add the row
                session.add(db_model(**kwargs))

            if count % 100 == 0:
                session.flush()

        session.commit()
        session.close()
Ejemplo n.º 6
0
def get_stat_data(fields, geo_level, geo_code, session, order_by=None,
                  percent=True, total=None, table_fields=None,
                  table_name=None, only=None, exclude=None, exclude_zero=False,
                  recode=None, key_order=None):
    """
    This is our primary helper routine for building a dictionary suitable for
    a place's profile page, based on a statistic.

    It sums over the data for +fields+ in the database for the place identified by
    +geo_level+ and +geo_code+ and calculates numerators and values. If multiple
    fields are given, it creates nested result dictionaries.

    Control the rows that are included or ignored using +only+, +exclude+ and +exclude_zero+.

    The field values can be recoded using +recode+ and and re-ordered using +key_order+.

    :param str or list fields: the census field to build stats for. Specify a list of fields to build
                               nested statistics. If multiple fields are specified, then the values 
                               of parameters such as +only+, +exclude+ and +recode+ will change. 
                               These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level'
    :param str geo_level: the geographical level
    :param str geo_code: the geographical code
    :param dbsession session: sqlalchemy session
    :param str order_by: field to order by, or None for default, eg. '-total'
    :param bool percent: should we calculate percentages, or just sum raw values?
    :param list table_fields: list of fields to use to find the table, defaults to `fields`
    :param int total: the total value to use for percentages, or None to total columns automatically
    :param str table_name: override the table name, otherwise it's calculated from the fields and geo_level
    :param dict or list only: only include these field values. If +fields+ has many items, this must be a dict
                              mapping field names to a list of strings.
    :param doct or list exclude: ignore these field values. If +fields+ has many items, this must be a dict
                                 mapping field names to a list of strings. Field names are checked
                                 before any recoding.
    :param bool exclude_zero: ignore fields that have a zero total
    :param dict or lambda: function or dict to recode values of +key_field+. If +fields+ is a singleton,
                           then the keys of this dict must be the values to recode from, otherwise
                           they must be the field names and then the values. If this is a lambda,
                           it is called with the field name and its value as arguments.
    :param dict or list key_order: ordering for keys in result dictionary. If +fields+ has many items,
                                   this must be a dict from field names to orderings.
                                   The default ordering is determined by +order+.

    :return: (data-dictionary, total)
    """

    if not isinstance(fields, list):
        fields = [fields]

    n_fields = len(fields)
    many_fields = n_fields > 1

    if order_by is None:
        order_by = fields[0]

    if only is not None:
        if not isinstance(only, dict):
            if many_fields:
                raise ValueError("If many fields are given, then only must be a dict. I got %s instead" % only)
            else:
                only = {fields[0]: set(only)}

    if exclude is not None:
        if not isinstance(exclude, dict):
            if many_fields:
                raise ValueError("If many fields are given, then exclude must be a dict. I got %s instead" % exclude)
            else:
                exclude = {fields[0]: set(exclude)}

    if key_order:
        if not isinstance(key_order, dict):
            if many_fields:
                raise ValueError("If many fields are given, then key_order must be a dict. I got %s instead" % key_order)
            else:
                key_order = {fields[0]: key_order}
    else:
        key_order = {}

    if total is not None and many_fields:
        raise ValueError("Cannot specify a total if many fields are given")

    if recode:
        if not isinstance(recode, dict) or not many_fields:
            recode = dict((f, recode) for f in fields)


    model = get_model_from_fields(table_fields or fields, geo_level, table_name)
    objects = get_objects_by_geo(model, geo_code, geo_level, session, fields=fields, order_by=order_by)

    root_data = OrderedDict()
    our_total = {}

    def get_data_object(obj):
        """ Recurse down the list of fields and return the
        final resting place for data for this stat. """
        data = root_data

        for i, field in enumerate(fields):
            key = getattr(obj, field)

            if only and key not in only.get(field, {}):
                return key, None

            if exclude and key in exclude.get(field, {}):
                return key, None

            if recode and field in recode:
                recoder = recode[field]
                if isinstance(recoder, dict):
                    key = recoder.get(key, key)
                else:
                    key = recoder(field, key)
            else:
                key = capitalize(key)

            # enforce key ordering
            if not data and field in key_order:
                for fld in key_order[field]:
                    data[fld] = OrderedDict()

            # ensure it's there
            if key not in data:
                data[key] = OrderedDict()

            data = data[key]

            # default values for intermediate fields
            if data and i < n_fields-1:
                data['metadata'] = {'name': key}

        # data is now the dict where the end value is going to go
        if not data:
            data['name'] = key
            data['numerators'] = {'this': 0.0}

        return key, data


    # run the stats for the objects
    for obj in objects:
        if obj.total == 0 and exclude_zero:
            continue

        # get the data dict where these values must go
        key, data = get_data_object(obj)
        if not data:
            continue

        our_total[key] = our_total.get(key, 0.0) + obj.total
        data['numerators']['this'] += obj.total

    if total is not None:
        grand_total = total
    else:
        grand_total = sum(our_total.values())

    # add in percentages
    if percent:
        def calc_percent(data):
            for key, data in data.iteritems():
                if not key == 'metadata':
                    if 'numerators' in data:
                        tot = our_total[key] if many_fields else grand_total
                        perc = 0 if tot == 0 else (data['numerators']['this'] / tot * 100)
                        data['values'] = {'this': round(perc, 2)}
                    else:
                        calc_percent(data)

        calc_percent(root_data)

    add_metadata(root_data, model)

    return root_data, grand_total
Ejemplo n.º 7
0
def get_stat_data(fields,
                  geo_level,
                  geo_code,
                  session,
                  order_by=None,
                  percent=True,
                  total=None,
                  table_fields=None,
                  table_name=None,
                  only=None,
                  exclude=None,
                  exclude_zero=False,
                  recode=None,
                  key_order=None):
    """
    This is our primary helper routine for building a dictionary suitable for
    a place's profile page, based on a statistic.

    It sums over the data for +fields+ in the database for the place identified by
    +geo_level+ and +geo_code+ and calculates numerators and values. If multiple
    fields are given, it creates nested result dictionaries.

    Control the rows that are included or ignored using +only+, +exclude+ and +exclude_zero+.

    The field values can be recoded using +recode+ and and re-ordered using +key_order+.

    :param str or list fields: the census field to build stats for. Specify a list of fields to build
                               nested statistics. If multiple fields are specified, then the values 
                               of parameters such as +only+, +exclude+ and +recode+ will change. 
                               These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level'
    :param str geo_level: the geographical level
    :param str geo_code: the geographical code
    :param dbsession session: sqlalchemy session
    :param str order_by: field to order by, or None for default, eg. '-total'
    :param bool percent: should we calculate percentages, or just sum raw values?
    :param list table_fields: list of fields to use to find the table, defaults to `fields`
    :param int total: the total value to use for percentages, or None to total columns automatically
    :param str table_name: override the table name, otherwise it's calculated from the fields and geo_level
    :param dict or list only: only include these field values. If +fields+ has many items, this must be a dict
                              mapping field names to a list of strings.
    :param dict or list exclude: ignore these field values. If +fields+ has many items, this must be a dict
                                 mapping field names to a list of strings. Field names are checked
                                 before any recoding.
    :param bool exclude_zero: ignore fields that have a zero total
    :param dict or lambda: function or dict to recode values of +key_field+. If +fields+ is a singleton,
                           then the keys of this dict must be the values to recode from, otherwise
                           they must be the field names and then the values. If this is a lambda,
                           it is called with the field name and its value as arguments.
    :param dict or list key_order: ordering for keys in result dictionary. If +fields+ has many items,
                                   this must be a dict from field names to orderings.
                                   The default ordering is determined by +order+.

    :return: (data-dictionary, total)
    """

    if not isinstance(fields, list):
        fields = [fields]

    n_fields = len(fields)
    many_fields = n_fields > 1

    if order_by is None:
        order_by = fields[0]

    if only is not None:
        if not isinstance(only, dict):
            if many_fields:
                raise ValueError(
                    "If many fields are given, then only must be a dict. I got %s instead"
                    % only)
            else:
                only = {fields[0]: set(only)}

    if exclude is not None:
        if not isinstance(exclude, dict):
            if many_fields:
                raise ValueError(
                    "If many fields are given, then exclude must be a dict. I got %s instead"
                    % exclude)
            else:
                exclude = {fields[0]: set(exclude)}

    if key_order:
        if not isinstance(key_order, dict):
            if many_fields:
                raise ValueError(
                    "If many fields are given, then key_order must be a dict. I got %s instead"
                    % key_order)
            else:
                key_order = {fields[0]: key_order}
    else:
        key_order = {}

    if total is not None and many_fields:
        raise ValueError("Cannot specify a total if many fields are given")

    if recode:
        if not isinstance(recode, dict) or not many_fields:
            recode = dict((f, recode) for f in fields)

    model = get_model_from_fields(table_fields or fields, geo_level,
                                  table_name)
    objects = get_objects_by_geo(model,
                                 geo_code,
                                 geo_level,
                                 session,
                                 fields=fields,
                                 order_by=order_by)

    root_data = OrderedDict()
    our_total = {}

    def get_data_object(obj):
        """ Recurse down the list of fields and return the
        final resting place for data for this stat. """
        data = root_data

        for i, field in enumerate(fields):
            key = getattr(obj, field)

            if only and field in only and key not in only.get(field, {}):
                return key, None

            if exclude and key in exclude.get(field, {}):
                return key, None

            if recode and field in recode:
                recoder = recode[field]
                if isinstance(recoder, dict):
                    key = recoder.get(key, key)
                else:
                    key = recoder(field, key)
            else:
                key = capitalize(key)

            # enforce key ordering
            if not data and field in key_order:
                for fld in key_order[field]:
                    data[fld] = OrderedDict()

            # ensure it's there
            if key not in data:
                data[key] = OrderedDict()

            data = data[key]

            # default values for intermediate fields
            if data is not None and i < n_fields - 1:
                data['metadata'] = {'name': key}

        # data is now the dict where the end value is going to go
        if not data:
            data['name'] = key
            data['numerators'] = {'this': 0.0}

        return key, data

    # run the stats for the objects
    for obj in objects:
        if obj.total == 0 and exclude_zero:
            continue

        # get the data dict where these values must go
        key, data = get_data_object(obj)
        if not data:
            continue

        our_total[key] = our_total.get(key, 0.0) + obj.total
        data['numerators']['this'] += obj.total

    if total is not None:
        grand_total = total
    else:
        grand_total = sum(our_total.values())

    # add in percentages
    def calc_percent(data):
        for key, data in data.iteritems():
            if not key == 'metadata':
                if 'numerators' in data:
                    if percent:
                        tot = our_total[key] if many_fields else grand_total
                        perc = 0 if tot == 0 else (data['numerators']['this'] /
                                                   tot * 100)
                        data['values'] = {'this': round(perc, 2)}
                    else:
                        data['values'] = dict(data['numerators'])
                        data['numerators']['this'] = None
                else:
                    calc_percent(data)

    calc_percent(root_data)

    add_metadata(root_data, model)

    return root_data, grand_total
Ejemplo n.º 8
0
def get_demographics_profile(geo_code, geo_level, session):
    # sex
    sex_dist_data, total_pop = get_stat_data(
            'sex', geo_level, geo_code, session,
            table_fields=['age in completed years', 'sex', 'rural or urban'])

    # urban/rural by sex
    urban_dist_data, _ = get_stat_data(
            ['rural or urban', 'sex'], geo_level, geo_code, session,
            table_fields=['age in completed years', 'sex', 'rural or urban'])
    total_urbanised = 0
    for data in urban_dist_data['Urban'].itervalues():
        if 'numerators' in data:
            total_urbanised += data['numerators']['this']

    # median age
    db_model_age = get_model_from_fields(['age in completed years', 'sex', 'rural or urban'], geo_level)
    objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years'])
    objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'),
                     key=lambda x: int(getattr(x, 'age in completed years').replace('+' ,'')))
    median = calculate_median(objects, 'age in completed years')

    # age in 10 year groups
    def age_recode(f, x):
        age = int(x.replace('+', ''))
        if age >= 80:
            return '80+'
        bucket = 10 * (age / 10)
        return '%d-%d' % (bucket, bucket+9)

    age_dist_data, _ = get_stat_data(
            'age in completed years', geo_level, geo_code, session,
            table_fields=['age in completed years', 'sex', 'rural or urban'],
            recode=age_recode, exclude=['unspecified'])

    # age category
    def age_cat_recode(f, x):
        age = int(x.replace('+', ''))
        if age < 18:
            return 'Under 18'
        elif age >= 65:
            return '65 and over'
        else:
            return '18 to 64'

    age_cats, _ = get_stat_data(
            'age in completed years', geo_level, geo_code, session,
            table_fields=['age in completed years', 'sex', 'rural or urban'],
            recode=age_cat_recode,
            exclude=['unspecified'])

    final_data = {
        'sex_ratio': sex_dist_data,
        'urban_distribution': urban_dist_data,
        'urbanised': {
            'name': 'In urban areas',
            'numerators': {'this': total_urbanised},
            'values': {'this': round(total_urbanised / total_pop * 100, 2)}
        },
        'age_group_distribution': age_dist_data,
        'age_category_distribution': age_cats,
        'median_age': {
            "name": "Median age",
            "values": {"this": median},
        },
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }}

    return final_data
Ejemplo n.º 9
0
def get_service_delivery_profile(geo_code, geo_level, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ['source of water'],
        geo_level,
        geo_code,
        session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators'][
            'this']
    else:
        total_water_sp = 0.0

    # refuse disposal
    db_model_ref = get_model_from_fields(['refuse disposal'], geo_level)
    objects = get_objects_by_geo(db_model_ref,
                                 geo_code,
                                 geo_level,
                                 session,
                                 order_by='-total')
    refuse_disp_data = OrderedDict()
    total_ref = 0.0
    total_ref_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'refuse disposal')
        disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr]
        refuse_disp_data[disp] = {
            "name": disp,
            "numerators": {
                "this": obj.total
            },
        }
        total_ref += obj.total
        if attr.startswith('Removed by local authority'):
            total_ref_sp += obj.total

    # electricity
    elec_attrs = [
        'electricity for cooking', 'electricity for heating',
        'electricity for lighting'
    ]
    db_model_elec = get_model_from_fields(elec_attrs, geo_level)
    objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {
                "this": 0.0
            },
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {
                "this": 0.0
            },
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {
                "this": 0.0
            },
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators'][
                'this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators'][
                'this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators'][
                'this'] += obj.total

    for data, total in zip((refuse_disp_data, elec_access_data),
                           (total_ref, total_elec)):
        for fields in data.values():
            fields["values"] = {
                "this": percent(fields["numerators"]["this"], total)
            }

    add_metadata(refuse_disp_data, db_model_ref)
    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ['toilet facilities'],
        geo_level,
        geo_code,
        session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name":
            "Are getting water from a regional or local service provider",
            "numerators": {
                "this": total_water_sp
            },
            "values": {
                "this": percent(total_water_sp, total_wsrc)
            },
        },
        'refuse_disposal_distribution': refuse_disp_data,
        'percentage_ref_disp_from_service_provider': {
            "name":
            "Are getting refuse disposal from a local authority or private company",
            "numerators": {
                "this": total_ref_sp
            },
            "values": {
                "this": percent(total_ref_sp, total_ref)
            },
        },
        'percentage_electricity_access': {
            "name":
            "Have electricity for at least one of cooking, heating or lighting",
            "numerators": {
                "this": total_some_elec
            },
            "values": {
                "this": percent(total_some_elec, total_elec)
            },
        },
        'electricity_access_distribution': elec_access_data,
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {
                "this": total_flush_toilet
            },
            "values": {
                "this": percent(total_flush_toilet, total_toilet)
            },
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {
                "this": total_no_toilet
            },
            "values": {
                "this": percent(total_no_toilet, total_toilet)
            },
        },
        'toilet_facilities_distribution': toilet_data,
    }
Ejemplo n.º 10
0
def get_households_profile(geo_code, geo_level, session):
    # head of household
    # gender
    db_model_gender = get_model_from_fields(['gender of head of household'],
                                            geo_level)
    objects = get_objects_by_geo(db_model_gender, geo_code, geo_level, session)
    total_households = 0.0
    female_heads = 0.0
    for obj in objects:
        total_households += obj.total

        gender = getattr(obj, 'gender of head of household')
        if gender == 'Unspecified':
            continue
        if gender == 'Female':
            female_heads += obj.total

    # age
    db_model_age = get_model_from_fields(['age of household head'],
                                            geo_level)
    objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session)
    total_under_20 = 0.0
    for obj in objects:
        age = getattr(obj, 'age of household head')
        if age in ['10 - 14', '15 - 19']:
            total_under_20 += obj.total

    # tenure
    db_model_tenure = get_model_from_fields(['tenure status'],
                                            geo_level)
    objects = get_objects_by_geo(db_model_tenure, geo_code, geo_level, session)
    tenure_data = {}
    owned = 0.0
    for obj in objects:
        tenure = getattr(obj, 'tenure status')
        if tenure.startswith('Owned'):
            owned += obj.total
        tenure_data[tenure] = {
            "name": tenure,
            "values": {"this": round(obj.total / total_households * 100, 2)},
            "numerators": {"this": obj.total},
        }

    add_metadata(tenure_data, db_model_tenure)

    # type of dwelling
    db_model_dwelling = get_model_from_fields(['type of dwelling'],
                                            geo_level)
    objects = get_objects_by_geo(db_model_dwelling, geo_code, geo_level, session)
    informal = 0.0
    for obj in objects:
        dwelling = getattr(obj, 'type of dwelling')
        if dwelling.startswith('Informal'):
            informal += obj.total


    # household goods
    household_goods, _ = get_stat_data(
            ['household goods'], geo_level, geo_code, session,
            total=total_households,
            recode=HOUSEHOLD_GOODS_RECODE,
            exclude=['total households'],
            key_order=sorted(HOUSEHOLD_GOODS_RECODE.values()))

    return {'total_households': {
                'name': 'Households',
                'values': {'this': total_households},
                },
            'owned': {
                'name': 'Households fully owned or being paid off',
                'values': {'this': round(owned / total_households * 100, 2)},
                'numerators': {'this': owned},
                },
            'informal': {
                'name': 'Households that are informal dwellings (shacks)',
                'values': {'this': round(informal / total_households * 100, 2)},
                'numerators': {'this': informal},
                },
            'tenure_distribution': tenure_data,
            'household_goods': household_goods,
            'head_of_household': {
                'female': {
                    'name': 'Households with women as their head',
                    'values': {'this': round(female_heads / total_households * 100, 2)},
                    'numerators': {'this': female_heads},
                    },
                'under_20': {
                    'name': 'Households with heads under 20 years old',
                    'values': {'this': total_under_20},
                    }
                },
           }
Ejemplo n.º 11
0
    elif len(geo_name.split(':')[0]) in (5, 6):
        geo_level = 'municipality'
    elif 'Ward' in geo_name:
        geo_level = 'ward'
    elif len(geo_name.split(':')[0]) >= 7:
        geo_level = 'province'
        session = get_session()
        province_codes = dict((p.name, p.code) for p in session.query(Province))
        session.close()
    elif geo_name.startswith('DC'):
        geo_level = 'district'
    else:
        raise ValueError("Cannot recognize the geo level of data")

    # get db model and create table if necessary
    db_model = get_model_from_fields(fields, geo_level, table_name)
    Base.metadata.create_all(_engine, tables=[db_model.__table__])

    # restart generator
    data = open_census_csv(filepath)
    next(data)  # skip field name and categories
    session = get_session()

    for geo_name, values in data:
        if geo_level == 'province':
            code = province_codes[geo_name]
        elif geo_name == 'country':
            code = None
        else:
            code = geo_name.split(':')[0]
        base_kwargs = {'%s_code' % geo_level: code} if code else {}
Ejemplo n.º 12
0
def get_service_delivery_profile(geo_code, geo_level, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
            ['source of water'], geo_level, geo_code, session,
            recode=SHORT_WATER_SOURCE_CATEGORIES,
            order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators']['this']
    else:
        total_water_sp = 0.0

    # refuse disposal
    db_model_ref = get_model_from_fields(['refuse disposal'], geo_level)
    objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session,
                                 order_by='-total')
    refuse_disp_data = OrderedDict()
    total_ref = 0.0
    total_ref_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'refuse disposal')
        disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr]
        refuse_disp_data[disp] = {
            "name": disp,
            "numerators": {"this": obj.total},
        }
        total_ref += obj.total
        if attr.startswith('Removed by local authority'):
            total_ref_sp += obj.total

    # electricity
    elec_attrs = ['electricity for cooking',
                  'electricity for heating',
                  'electricity for lighting']
    db_model_elec = get_model_from_fields(elec_attrs, geo_level)
    objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {"this": 0.0},
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {"this": 0.0},
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {"this": 0.0},
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators']['this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators']['this'] += obj.total

    for data, total in zip((refuse_disp_data, elec_access_data),
                           (total_ref, total_elec)):
        for fields in data.values():
            fields["values"] = {"this": percent(fields["numerators"]["this"], total)}

    add_metadata(refuse_disp_data, db_model_ref)
    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
            ['toilet facilities'], geo_level, geo_code, session,
            exclude_zero=True,
            recode=COLLAPSED_TOILET_CATEGORIES,
            order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {'water_source_distribution': water_src_data,
            'percentage_water_from_service_provider': {
                "name": "Are getting water from a regional or local service provider",
                "numerators": {"this": total_water_sp},
                "values": {"this": percent(total_water_sp, total_wsrc)},
            },
            'refuse_disposal_distribution': refuse_disp_data,
            'percentage_ref_disp_from_service_provider': {
                "name": "Are getting refuse disposal from a local authority or private company",
                "numerators": {"this": total_ref_sp},
                "values": {"this": percent(total_ref_sp, total_ref)},
            },
            'percentage_electricity_access': {
                "name": "Have electricity for at least one of cooking, heating or lighting",
                "numerators": {"this": total_some_elec},
                "values": {"this": percent(total_some_elec, total_elec)},
            },
            'electricity_access_distribution': elec_access_data,
            'percentage_flush_toilet_access': {
                "name": "Have access to flush or chemical toilets",
                "numerators": {"this": total_flush_toilet},
                "values": {"this": percent(total_flush_toilet, total_toilet)},
            },
            'percentage_no_toilet_access': {
                "name": "Have no access to any toilets",
                "numerators": {"this": total_no_toilet},
                "values": {"this": percent(total_no_toilet, total_toilet)},
            },
            'toilet_facilities_distribution': toilet_data,
    }
Ejemplo n.º 13
0
def get_households_profile(geo_code, geo_level, session):
    # head of household
    # gender
    head_gender_dist, total_households = get_stat_data(
            ['gender of household head'], geo_level, geo_code, session,
            order_by='gender of household head')
    female_heads = head_gender_dist['Female']['numerators']['this']

    # age
    db_model_u18 = get_model_from_fields(
        ['gender of head of household'], geo_level,
        table_name='genderofheadofhouseholdunder18_%s' % geo_level
    )
    objects = get_objects_by_geo(db_model_u18, geo_code, geo_level, session)
    total_under_18 = float(sum(o[0] for o in objects))

    # tenure
    tenure_data, _ = get_stat_data(
            ['tenure status'], geo_level, geo_code, session,
            order_by='tenure status')
    owned = 0
    for key, data in tenure_data.iteritems():
        if key.startswith('Owned'):
            owned += data['numerators']['this']

    # annual household income
    income_dist_data, _ = get_stat_data(
            ['annual household income'], geo_level, geo_code, session,
            exclude=['Unspecified'],
            recode=HOUSEHOLD_INCOME_RECODE,
            key_order=HOUSEHOLD_INCOME_RECODE.values(),
            table_name='annualhouseholdincome_genderofhouseholdhead_%s' % geo_level)

    # median income
    median = calculate_median_stat(income_dist_data)
    median_income = HOUSEHOLD_INCOME_ESTIMATE[median]

    # type of dwelling
    type_of_dwelling_dist, _ = get_stat_data(
            ['type of dwelling'], geo_level, geo_code, session,
            recode=TYPE_OF_DWELLING_RECODE,
            order_by='-total')
    informal = type_of_dwelling_dist['Shack']['numerators']['this']

    # household goods
    household_goods, _ = get_stat_data(
            ['household goods'], geo_level, geo_code, session,
            total=total_households,
            recode=HOUSEHOLD_GOODS_RECODE,
            exclude=['total households'],
            key_order=sorted(HOUSEHOLD_GOODS_RECODE.values()))

    return {'total_households': {
                'name': 'Households',
                'values': {'this': total_households},
                },
            'owned': {
                'name': 'Households fully owned or being paid off',
                'values': {'this': percent(owned, total_households)},
                'numerators': {'this': owned},
                },
            'type_of_dwelling_distribution': type_of_dwelling_dist,
            'informal': {
                'name': 'Households that are informal dwellings (shacks)',
                'values': {'this': percent(informal, total_households)},
                'numerators': {'this': informal},
                },
            'tenure_distribution': tenure_data,
            'household_goods': household_goods,
            'annual_income_distribution': income_dist_data,
            'median_annual_income': {
                'name': 'Average annual household income',
                'values': {'this': median_income},
                },
            'head_of_household': {
                'gender_distribution': head_gender_dist,
                'female': {
                    'name': 'Households with women as their head',
                    'values': {'this': percent(female_heads, total_households)},
                    'numerators': {'this': female_heads},
                    },
                'under_18': {
                    'name': 'Households with heads under 18 years old',
                    'values': {'this': total_under_18},
                    }
                },
           }
Ejemplo n.º 14
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo_level, geo_code, session)

    # language
    language_data, _ = get_stat_data(
            ['language'], geo_level, geo_code, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo_level, geo_code, session,
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender_%s' % geo_level)
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    geo_attr = '%s_code' % geo_level
    query = query.filter(getattr(db_model_sex, geo_attr) == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)},
                "numerators": {"this": total_pop - total_male},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": round(total_male / total_pop * 100, 2)},
                "numerators": {"this": total_male},
            }),
        ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop},
        }
    }

    geo = get_geography(geo_code, geo_level)
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    db_model_age = get_model_from_fields(
        ['age in completed years'], geo_level,
        table_name='ageincompletedyears_%s' % geo_level
    )
    objects = sorted(
        get_objects_by_geo(db_model_age, geo_code, geo_level, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'], geo_level, geo_code, session,
        table_name='ageincompletedyearssimplified_%s' % geo_level,
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={'< 18': 'Under 18',
                '>= 65': '65 and over'})
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo_level, geo_code, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
Ejemplo n.º 15
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo_level, geo_code, session)

    # language
    language_data, _ = get_stat_data(
            ['language'], geo_level, geo_code, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo_level, geo_code, session,
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'], geo_level)
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    geo_attr = '%s_code' % geo_level
    query = query.filter(getattr(db_model_sex, geo_attr) == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)},
                "numerators": {"this": total_pop - total_male},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": round(total_male / total_pop * 100, 2)},
                "numerators": {"this": total_male},
            }),
        ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }}

    # median age/age category
    db_model_age = get_model_from_fields(['age in completed years'], geo_level)
    objects = sorted(
        get_objects_by_geo(db_model_age, geo_code, geo_level, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }
    # age category
    under_18 = 0.0
    over_or_65 = 0.0
    between_18_64 = 0.0
    total = 0.0
    for obj in objects:
        age = int(getattr(obj, 'age in completed years'))
        total += obj.total
        if age < 18:
            under_18 += obj.total
        elif age >= 65:
            over_or_65 += obj.total
        else:
            between_18_64 += obj.total

    age_dist = OrderedDict((
        ("under_18", {
            "name": "Under 18",
            "values": {"this": round(under_18 / total * 100, 2)}
        }),
        ("18_to_64", {
            "name": "18 to 64",
            "values": {"this": round(between_18_64 / total * 100, 2)}
        }),
        ("65_and_over", {
            "name": "65 and over",
            "values": {"this": round(over_or_65 / total * 100, 2)}
        })))

    add_metadata(age_dist, db_model_age)

    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo_level, geo_code, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
Ejemplo n.º 16
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(['population group'], geo_level,
                                             geo_code, session)

    # language
    language_data, _ = get_stat_data(['language'],
                                     geo_level,
                                     geo_code,
                                     session,
                                     order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
        ['age groups in 5 years'],
        geo_level,
        geo_code,
        session,
        recode=COLLAPSED_AGE_CATEGORIES,
        key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69',
                   '70-79', '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'],
                                         geo_level,
                                         table_name='gender_%s' % geo_level)
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    geo_attr = '%s_code' % geo_level
    query = query.filter(getattr(db_model_sex, geo_attr) == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
        ('Female', {
            "name": "Female",
            "values": {
                "this": round((total_pop - total_male) / total_pop * 100, 2)
            },
            "numerators": {
                "this": total_pop - total_male
            },
        }),
        ('Male', {
            "name": "Male",
            "values": {
                "this": round(total_male / total_pop * 100, 2)
            },
            "numerators": {
                "this": total_male
            },
        }),
    ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {
                "this": total_pop
            },
        }
    }

    geo = get_geography(geo_code, geo_level)
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {
                "this": total_pop / geo.square_kms
            },
        }

    # median age/age category
    db_model_age = get_model_from_fields(['age in completed years'],
                                         geo_level,
                                         table_name='ageincompletedyears_%s' %
                                         geo_level)
    objects = sorted(get_objects_by_geo(db_model_age, geo_code, geo_level,
                                        session),
                     key=lambda x: int(getattr(x, 'age in completed years')))
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {
            "this": median
        },
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'],
        geo_level,
        geo_code,
        session,
        table_name='ageincompletedyearssimplified_%s' % geo_level,
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={
            '< 18': 'Under 18',
            '>= 65': '65 and over'
        })
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(['citizenship'],
                                        geo_level,
                                        geo_code,
                                        session,
                                        order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
        'name': 'South African citizens',
        'values': {
            'this': percent(sa_citizen, total_pop)
        },
        'numerators': {
            'this': sa_citizen
        },
    }

    # migration
    province_of_birth_dist, _ = get_stat_data(['province of birth'],
                                              geo_level,
                                              geo_code,
                                              session,
                                              exclude_zero=True,
                                              order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(['region of birth'],
                                            geo_level,
                                            geo_code,
                                            session,
                                            exclude_zero=True,
                                            order_by='-total',
                                            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
        'name': 'Born in South Africa',
        'values': {
            'this': percent(born_in_sa, total_pop)
        },
        'numerators': {
            'this': born_in_sa
        },
    }

    return final_data
Ejemplo n.º 17
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo_level, geo_code, session)

    # language
    language_data, _ = get_stat_data(
            ['language'], geo_level, geo_code, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo_level, geo_code, session,
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'], geo_level)
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    geo_attr = '%s_code' % geo_level
    query = query.filter(getattr(db_model_sex, geo_attr) == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)},
                "numerators": {"this": total_male},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": round(total_male / total_pop * 100, 2)},
                "numerators": {"this": total_pop - total_male},
            }),
        ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }}

    # median age/age category
    db_model_age = get_model_from_fields(['age in completed years'], geo_level)
    objects = sorted(
        get_objects_by_geo(db_model_age, geo_code, geo_level, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }
    # age category
    under_18 = 0.0
    over_or_65 = 0.0
    between_18_64 = 0.0
    total = 0.0
    for obj in objects:
        age = int(getattr(obj, 'age in completed years'))
        total += obj.total
        if age < 18:
            under_18 += obj.total
        elif age >= 65:
            over_or_65 += obj.total
        else:
            between_18_64 += obj.total

    age_dist = OrderedDict((
        ("under_18", {
            "name": "Under 18",
            "values": {"this": round(under_18 / total * 100, 2)}
        }),
        ("18_to_64", {
            "name": "18 to 64",
            "values": {"this": round(between_18_64 / total * 100, 2)}
        }),
        ("65_and_over", {
            "name": "65 and over",
            "values": {"this": round(over_or_65 / total * 100, 2)}
        })
    ))

    add_metadata(age_dist, db_model_age)

    final_data['age_category_distribution'] = age_dist

    return final_data
Ejemplo n.º 18
0
def get_households_profile(geo_code, geo_level, session):
    # head of household
    # gender
    head_gender_dist, total_households = get_stat_data(
        ['gender of household head'],
        geo_level,
        geo_code,
        session,
        order_by='gender of household head')
    female_heads = head_gender_dist['Female']['numerators']['this']

    # age
    db_model_u18 = get_model_from_fields(
        ['gender of head of household'],
        geo_level,
        table_name='genderofheadofhouseholdunder18_%s' % geo_level)
    objects = get_objects_by_geo(db_model_u18, geo_code, geo_level, session)
    total_under_18 = float(sum(o[0] for o in objects))

    # tenure
    tenure_data, _ = get_stat_data(['tenure status'],
                                   geo_level,
                                   geo_code,
                                   session,
                                   order_by='tenure status')
    owned = 0
    for key, data in tenure_data.iteritems():
        if key.startswith('Owned'):
            owned += data['numerators']['this']

    # annual household income
    income_dist_data, _ = get_stat_data(
        ['annual household income'],
        geo_level,
        geo_code,
        session,
        exclude=['Unspecified'],
        recode=HOUSEHOLD_INCOME_RECODE,
        key_order=HOUSEHOLD_INCOME_RECODE.values(),
        table_name='annualhouseholdincome_genderofhouseholdhead_%s' %
        geo_level)

    # median income
    median = calculate_median_stat(income_dist_data)
    median_income = HOUSEHOLD_INCOME_ESTIMATE[median]

    # type of dwelling
    type_of_dwelling_dist, _ = get_stat_data(['type of dwelling'],
                                             geo_level,
                                             geo_code,
                                             session,
                                             recode=TYPE_OF_DWELLING_RECODE,
                                             order_by='-total')
    informal = type_of_dwelling_dist['Shack']['numerators']['this']

    # household goods
    household_goods, _ = get_stat_data(['household goods'],
                                       geo_level,
                                       geo_code,
                                       session,
                                       total=total_households,
                                       recode=HOUSEHOLD_GOODS_RECODE,
                                       exclude=['total households'],
                                       key_order=sorted(
                                           HOUSEHOLD_GOODS_RECODE.values()))

    return {
        'total_households': {
            'name': 'Households',
            'values': {
                'this': total_households
            },
        },
        'owned': {
            'name': 'Households fully owned or being paid off',
            'values': {
                'this': percent(owned, total_households)
            },
            'numerators': {
                'this': owned
            },
        },
        'type_of_dwelling_distribution': type_of_dwelling_dist,
        'informal': {
            'name': 'Households that are informal dwellings (shacks)',
            'values': {
                'this': percent(informal, total_households)
            },
            'numerators': {
                'this': informal
            },
        },
        'tenure_distribution': tenure_data,
        'household_goods': household_goods,
        'annual_income_distribution': income_dist_data,
        'median_annual_income': {
            'name': 'Average annual household income',
            'values': {
                'this': median_income
            },
        },
        'head_of_household': {
            'gender_distribution': head_gender_dist,
            'female': {
                'name': 'Households with women as their head',
                'values': {
                    'this': percent(female_heads, total_households)
                },
                'numerators': {
                    'this': female_heads
                },
            },
            'under_18': {
                'name': 'Households with heads under 18 years old',
                'values': {
                    'this': total_under_18
                },
            }
        },
    }
Ejemplo n.º 19
0
def get_service_delivery_profile(geo_code, geo_level, session):
    # water source
    db_model_wsrc = get_model_from_fields(['source of water'], geo_level)
    objects = get_objects_by_geo(db_model_wsrc, geo_code, geo_level, session,
                                 order_by='-total')
    water_src_data = OrderedDict()
    total_wsrc = 0.0
    total_water_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'source of water')
        src = SHORT_WATER_SOURCE_CATEGORIES[attr]
        water_src_data[src] = {
            "name": src,
            "numerators": {"this": obj.total},
        }
        total_wsrc += obj.total
        if attr.startswith('Regional/local water scheme'):
            total_water_sp += obj.total

    # refuse disposal
    db_model_ref = get_model_from_fields(['refuse disposal'], geo_level)
    objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session,
                                 order_by='-total')
    refuse_disp_data = OrderedDict()
    total_ref = 0.0
    total_ref_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'refuse disposal')
        disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr]
        refuse_disp_data[disp] = {
            "name": disp,
            "numerators": {"this": obj.total},
        }
        total_ref += obj.total
        if attr.startswith('Removed by local authority'):
            total_ref_sp += obj.total

    # electricity
    elec_attrs = ['electricity for cooking',
                  'electricity for heating',
                  'electricity for lighting']
    db_model_elec = get_model_from_fields(elec_attrs, geo_level)
    objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {"this": 0.0},
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {"this": 0.0},
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {"this": 0.0},
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators']['this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators']['this'] += obj.total

    # toilets
    db_model_toilet = get_model_from_fields(['toilet facilities'], geo_level)
    objects = get_objects_by_geo(db_model_toilet, geo_code, geo_level, session,
                                 order_by='-total')
    toilet_data = OrderedDict()
    total_toilet = 0.0
    total_flush_toilet = 0.0
    for obj in objects:
        name = getattr(obj, 'toilet facilities')
        toilet_data[name] = {
            "name": name,
            "numerators": {"this": obj.total},
        }
        total_toilet += obj.total
        if name.startswith('Flush') or name.startswith('Chemical'):
            total_flush_toilet += obj.total

    total_no_toilet = toilet_data['None']['numerators']['this']
    toilet_data = collapse_categories(toilet_data,
                                      COLLAPSED_TOILET_CATEGORIES,
                                      key_order=(
                                        'Flush toilet', 'Chemical toilet',
                                        'Pit toilet', 'Bucket toilet',
                                        'Other', 'None', 'Unspecified', 'N/A'))

    for data, total in zip((water_src_data, refuse_disp_data, elec_access_data, toilet_data),
                           (total_wsrc, total_ref, total_elec, total_toilet)):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    add_metadata(water_src_data, db_model_wsrc)
    add_metadata(refuse_disp_data, db_model_ref)
    add_metadata(elec_access_data, db_model_elec)
    add_metadata(toilet_data, db_model_toilet)

    return {'water_source_distribution': water_src_data,
            'percentage_water_from_service_provider': {
                "name": "Are getting water from a regional or local service provider",
                "numerators": {"this": total_water_sp},
                "values": {"this": round(total_water_sp / total_wsrc * 100, 2)},
            },
            'refuse_disposal_distribution': refuse_disp_data,
            'percentage_ref_disp_from_service_provider': {
                "name": "Are getting refuse disposal from a local authority or private company",
                "numerators": {"this": total_ref_sp},
                "values": {"this": round(total_ref_sp / total_ref * 100, 2)},
            },
            'percentage_electricity_access': {
                "name": "Have electricity for at least one of cooking, heating or lighting",
                "numerators": {"this": total_some_elec},
                "values": {"this": round(total_some_elec / total_elec * 100, 2)}
            },
            'electricity_access_distribution': elec_access_data,
            'percentage_flush_toilet_access': {
                "name": "Have access to flush or chemical toilets",
                "numerators": {"this": total_flush_toilet},
                "values": {"this": round(total_flush_toilet / total_toilet * 100, 2)}
            },
            'percentage_no_toilet_access': {
                "name": "Have no access to any toilets",
                "numerators": {"this": total_no_toilet},
                "values": {"this": round(total_no_toilet / total_toilet * 100, 2)}
            },
            'toilet_facilities_distribution': toilet_data,
    }
Ejemplo n.º 20
0
def get_economics_profile(geo_code, geo_level, session):
    # income
    db_model_income = get_model_from_fields(['individual monthly income'],
                                            geo_level,
                                            'individualmonthlyincome_%s_employedonly'
                                            % geo_level)
    objects = get_objects_by_geo(db_model_income, geo_code, geo_level, session)
    income_dist_data = {}
    total_income = 0.0
    for obj in objects:
        income_group = getattr(obj, 'individual monthly income')
        if income_group == 'Not applicable':
            continue
        total_income += obj.total
        income_dist_data[income_group] = {
            "name": income_group,
            "numerators": {"this": obj.total},
        }
    key_order = COLLAPSED_INCOME_CATEGORIES.values()
    key_order.remove('N/A')
    income_dist_data = collapse_categories(income_dist_data,
                                           COLLAPSED_INCOME_CATEGORIES,
                                           key_order=key_order)

    db_model_employ = get_model_from_fields(['official employment status'],
                                            geo_level)
    objects = get_objects_by_geo(db_model_employ, geo_code, geo_level, session)
    employ_status = {}
    total_workers = 0.0
    for obj in objects:
        employ_st = getattr(obj, 'official employment status')
        if employ_st in ('Age less than 15 years', 'Not applicable'):
            continue
        total_workers += obj.total
        employ_status[employ_st] = {
            "name": employ_st,
            "numerators": {"this": obj.total},
        }

    # sector
    db_model_sector = get_model_from_fields(['type of sector'], geo_level)
    objects = get_objects_by_geo(db_model_sector, geo_code, geo_level,
                                 session, order_by='type of sector')
    sector_dist_data = OrderedDict()
    total_sector = 0.0
    for obj in objects:
        sector = getattr(obj, 'type of sector')
        if sector == 'Not applicable' or obj.total == 0:
            continue
        total_sector += obj.total
        sector_dist_data[sector] = {
            "name": sector,
            "numerators": {"this": obj.total},
        }

    for data, total in zip((income_dist_data, sector_dist_data, employ_status),
                           (total_income, total_sector, total_workers)):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    income_dist_data['metadata'] = {'universe': 'Officially employed individuals'}
    employ_status['metadata'] = {'universe': 'Workers 15 and over'}

    add_metadata(income_dist_data, db_model_income)
    add_metadata(employ_status, db_model_employ)
    add_metadata(sector_dist_data, db_model_sector)

    return {'individual_income_distribution': income_dist_data,
            'employment_status': employ_status,
            'sector_type_distribution': sector_dist_data}