Ejemplo n.º 1
0
def get_education_profile(geo_code, geo_level, session):
    db_model = get_model_from_fields(['highest educational level'], geo_level, table_name='highesteducationallevel20')
    objects = get_objects_by_geo(db_model, geo_code, geo_level, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {"this": obj.total},
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=EDUCATION_KEY_ORDER)
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {"this": get_or_higher},
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {"this": fet_or_higher},
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}
    edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}

    add_metadata(edu_dist_data, db_model)

    return {'educational_attainment_distribution': edu_dist_data,
            'educational_attainment': edu_split_data}
Ejemplo n.º 2
0
def get_education_profile(geo, session):
    db_model = get_model_from_fields(['highest educational level'], geo.geo_level, table_name='highesteducationallevel20')
    objects = get_objects_by_geo(db_model, geo, session)

    edu_dist_data = {}
    get_or_higher = 0.0
    fet_or_higher = 0.0
    total = 0.0
    for i, obj in enumerate(objects):
        category_val = getattr(obj, 'highest educational level')
        # increment counters
        total += obj.total
        if category_val in EDUCATION_GET_OR_HIGHER:
            get_or_higher += obj.total
            if category_val in EDUCATION_FET_OR_HIGHER:
                fet_or_higher += obj.total
        # add data points for category
        edu_dist_data[str(i)] = {
            "name": category_val,
            "numerators": {"this": obj.total},
        }
    edu_dist_data = collapse_categories(edu_dist_data,
                                        COLLAPSED_EDUCATION_CATEGORIES,
                                        key_order=EDUCATION_KEY_ORDER)
    edu_split_data = {
        'percent_get_or_higher': {
            "name": "Completed Grade 9 or higher",
            "numerators": {"this": get_or_higher},
        },
        'percent_fet_or_higher': {
            "name": "Completed Matric or higher",
            "numerators": {"this": fet_or_higher},
        }
    }
    # calculate percentages
    for data in (edu_dist_data, edu_split_data):
        for fields in data.values():
            fields["values"] = {"this": round(fields["numerators"]["this"]
                                              / total * 100, 2)}

    edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}
    edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'}

    add_metadata(edu_dist_data, db_model)

    return {'educational_attainment_distribution': edu_dist_data,
            'educational_attainment': edu_split_data}
Ejemplo n.º 3
0
def get_education_profile(geo_code, geo_level, session):
    youth_completed_grade9, _ = get_stat_data(['completed grade9'], geo_level, geo_code, session, table_name='youth_age_16_to_17_gender_completed_grade9')

    youth_gender_completed_grade9, _ = get_stat_data(['gender', 'completed grade9'], geo_level, geo_code, session, table_name='youth_age_16_to_17_gender_completed_grade9')
    db_model_gender_completed_grade9 = get_model_from_fields(['gender'], geo_level, table_name='youth_age_16_to_17_gender_completed_grade9')
    gender_completed_grade9_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": youth_gender_completed_grade9['Female']['Yes']['values']['this']},
                "numerators": {"this": youth_gender_completed_grade9['Female']['Yes']['numerators']['this']},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": youth_gender_completed_grade9['Male']['Yes']['values']['this']},
                "numerators": {"this": youth_gender_completed_grade9['Male']['Yes']['numerators']['this']},
            }),
        ))
    add_metadata(gender_completed_grade9_data, db_model_gender_completed_grade9)

    youth_education_level, youth_pop_20_to_24 = get_stat_data(['education level'], geo_level, geo_code, session, table_name='youth_age_20_to_24_gender_education_level')

    matric_or_equiv = (
        youth_education_level['Matric']['numerators']['this'] +
        youth_education_level['Tertiary']['numerators']['this'] +
        youth_education_level['Some secondary']['numerators']['this'])


    final_data  = {
        'youth_completed_grade9': youth_completed_grade9,
        'youth_perc_completed_grade9': {
            "name": "Of youth aged 16-17 have completed grade 9",
            "values": {"this": youth_completed_grade9['Yes']['values']['this']},
        },
        'youth_gender_completed_grade9': gender_completed_grade9_data,
        'youth_perc_matric': {
            "name": "Of youth aged 20-24 have completed matric or matric equivalent",
            "values": {"this": percent(matric_or_equiv, youth_pop_20_to_24)},
        },
        'youth_education_level': youth_education_level
    }

    return final_data
Ejemplo n.º 4
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ['source of water'],
        geo,
        session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators'][
            'this']
    else:
        total_water_sp = 0.0

    # electricity
    elec_attrs = [
        'electricity for cooking', 'electricity for heating',
        'electricity for lighting'
    ]
    db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level)
    objects = get_objects_by_geo(db_model_elec, geo, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {
                "this": 0.0
            },
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {
                "this": 0.0
            },
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {
                "this": 0.0
            },
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators'][
                'this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators'][
                'this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators'][
                'this'] += obj.total

    for data, total in zip((elec_access_data, ), (total_elec, )):
        for fields in data.values():
            fields["values"] = {
                "this": percent(fields["numerators"]["this"], total)
            }

    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ['toilet facilities'],
        geo,
        session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name":
            "Are getting water from a regional or local service provider",
            "numerators": {
                "this": total_water_sp
            },
            "values": {
                "this": percent(total_water_sp, total_wsrc)
            },
        },
        'percentage_electricity_access': {
            "name":
            "Have electricity for at least one of cooking, heating or lighting",
            "numerators": {
                "this": total_some_elec
            },
            "values": {
                "this": percent(total_some_elec, total_elec)
            },
        },
        'electricity_access_distribution': elec_access_data,
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {
                "this": total_flush_toilet
            },
            "values": {
                "this": percent(total_flush_toilet, total_toilet)
            },
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {
                "this": total_no_toilet
            },
            "values": {
                "this": percent(total_no_toilet, total_toilet)
            },
        },
        'toilet_facilities_distribution': toilet_data,
    }
Ejemplo n.º 5
0
    def get_stat_data(
        self,
        fields,
        geo,
        session,
        order_by=None,
        percent=True,
        total=None,
        only=None,
        exclude=None,
        exclude_zero=False,
        recode=None,
        key_order=None,
        percent_grouping=None,
        slices=None,
        year=None,
        db_table=None,
    ):
        """
        This is our primary helper routine for building a dictionary suitable for
        a place's profile page, based on a statistic.

        It sums over the data for ``fields`` in the database for the place identified by
        ``geo`` and calculates numerators and values. If multiple fields are given,
        it creates nested result dictionaries.

        Control the rows that are included or ignored using ``only``, ``exclude`` and ``exclude_zero``.

        The field values can be recoded using ``recode`` and and re-ordered using ``key_order``.

        :param fields: the census field to build stats for. Specify a list of fields to build
                       nested statistics. If multiple fields are specified, then the values
                       of parameters such as ``only``, ``exclude`` and ``recode`` will change.
                       These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level'
        :type fields: str or list
        :param geo: the geograhy object
        :param dbsession session: sqlalchemy session
        :param str order_by: field to order by, or None for default, eg. '-total'
        :param bool percent: should we calculate percentages, or just sum raw values?
        :param list percent_grouping: when calculating percentages, which fields should rows be grouped by?
                                      Default: none of them -- calculate each entry as a percentage of the
                                      whole dataset. Ignored unless ``percent`` is ``True``.
        :param int total: the total value to use for percentages, or None to total columns automatically
        :param list only: only include these field values. If ``fields`` has many items, this must be a dict
                          mapping field names to a list of strings.
        :type only: dict or list
        :param exclude: ignore these field values. If ``fields`` has many items, this must be a dict
                        mapping field names to a list of strings. Field names are checked
                        before any recoding.
        :type exclude: dict or list
        :param bool exclude_zero: ignore fields that have a zero or null total
        :param recode: function or dict to recode values of ``key_field``. If ``fields`` is a singleton,
                       then the keys of this dict must be the values to recode from, otherwise
                       they must be the field names and then the values. If this is a lambda,
                       it is called with the field name and its value as arguments.
        :type recode: dict or lambda
        :param key_order: ordering for keys in result dictionary. If ``fields`` has many items,
                          this must be a dict from field names to orderings.
                          The default ordering is determined by ``order``.
        :type key_order: dict or list
        :param list slices: return only a slice of the final data, by choosing a single value for each
                           field in the field list, as specified in the slice list.
        :param str year: release year to use. None will try to use the current dataset context, and 'latest'
                         will use the latest release.
        :param str db_table: database table and release to use. None will try
                             to use `year` if given, and the current dataset context.

        :return: (data-dictionary, total)
        """
        if not isinstance(fields, list):
            fields = [fields]

        n_fields = len(fields)
        many_fields = n_fields > 1

        if order_by is None:
            order_by = fields[0]

        if only is not None:
            if not isinstance(only, dict):
                if many_fields:
                    raise ValueError(
                        "If many fields are given, then only must be a dict. I got %s instead"
                        % only
                    )
                else:
                    only = {fields[0]: set(only)}

        if exclude is not None:
            if not isinstance(exclude, dict):
                if many_fields:
                    raise ValueError(
                        "If many fields are given, then exclude must be a dict. I got %s instead"
                        % exclude
                    )
                else:
                    exclude = {fields[0]: set(exclude)}

        if key_order:
            if not isinstance(key_order, dict):
                if many_fields:
                    raise ValueError(
                        "If many fields are given, then key_order must be a dict. I got %s instead"
                        % key_order
                    )
                else:
                    key_order = {fields[0]: key_order}
        else:
            key_order = {}

        if recode:
            if not isinstance(recode, dict) or not many_fields:
                recode = dict((f, recode) for f in fields)

        # get the release and underlying database table
        db_table = db_table or self.get_db_table(year=year)
        objects = self.get_rows_for_geo(
            geo,
            session,
            fields=fields,
            order_by=order_by,
            only=only,
            exclude=exclude,
            db_table=db_table,
        )

        if total is not None and many_fields:
            raise ValueError("Cannot specify a total if many fields are given")

        if total and percent_grouping:
            raise ValueError("Cannot specify a total if percent_grouping is given")

        if total is None and percent and self.total_column is None:
            # The table doesn't support calculating percentages, but the caller
            # has asked for a percentage without providing a total value to use.
            # Either specify a total, or specify percent=False
            raise ValueError(
                "Asking for a percent on table %s that doesn't support totals and no total parameter specified."
                % self.name
            )

        # sanity check the percent grouping
        if percent:
            if percent_grouping:
                for field in percent_grouping:
                    if field not in fields:
                        raise ValueError(
                            "Field '%s' specified in percent_grouping must be in the fields list."
                            % field
                        )
                # re-order percent grouping to be same order as in the field list
                percent_grouping = [f for f in fields if f in percent_grouping]
        else:
            percent_grouping = None

        root_data = OrderedDict()
        running_total = 0
        group_totals = {}
        grand_total = -1

        def get_recoded_key(recode, field, key):
            recoder = recode[field]
            if isinstance(recoder, dict):
                return recoder.get(key, key)
            else:
                return recoder(field, key)

        def get_data_object(obj):
            """ Recurse down the list of fields and return the
            final resting place for data for this stat. """
            data = root_data

            for i, field in enumerate(fields):
                key = getattr(obj, field)

                if recode and field in recode:
                    key = get_recoded_key(recode, field, key)
                else:
                    key = capitalize(key)

                # enforce key ordering the first time we see this field
                if (not data or data.keys() == ["metadata"]) and field in key_order:
                    for fld in key_order[field]:
                        data[fld] = OrderedDict()

                # ensure it's there
                if key not in data:
                    data[key] = OrderedDict()

                data = data[key]

                # default values for intermediate fields
                if data is not None and i < n_fields - 1:
                    data["metadata"] = {"name": key}

            # data is now the dict where the end value is going to go
            if not data:
                data["name"] = key
                data["numerators"] = {"this": 0.0}

            return data

        # run the stats for the objects
        for obj in objects:
            if not obj.total and exclude_zero:
                continue

            if (
                self.denominator_key
                and getattr(obj, self.fields[-1]) == self.denominator_key
            ):
                grand_total = obj.total
                # don't include the denominator key in the output
                continue

            # get the data dict where these values must go
            data = get_data_object(obj)
            if not data:
                continue

            if obj.total is not None:
                data["numerators"]["this"] += obj.total
                running_total += obj.total
            else:
                # TODO: sanity check this is the right thing to do for multiple fields with
                # nested nulls -- does aggregating over nulls treat them as zero, or should we
                # treat them as null?
                data["numerators"]["this"] = None

            if percent_grouping:
                if obj.total is not None:
                    group_key = tuple()
                    for field in percent_grouping:
                        key = getattr(obj, field)
                        if recode and field in recode:
                            # Group by recoded keys
                            key = get_recoded_key(recode, field, key)
                        group_key = group_key + (key,)

                    data["_group_key"] = group_key
                    group_totals[group_key] = group_totals.get(group_key, 0) + obj.total

        if grand_total == -1:
            grand_total = running_total if total is None else total

        # add in percentages
        def calc_percent(data):
            for key, data in data.items():
                if not key == 'metadata':
                    if 'numerators' in data:
                        if percent:
                            if "_group_key" in data:
                                total = group_totals[data.pop("_group_key")]
                            else:
                                total = grand_total

                            if (
                                total is not None
                                and data["numerators"]["this"] is not None
                            ):
                                perc = (
                                    0
                                    if total == 0
                                    else (data["numerators"]["this"] / total * 100)
                                )
                                data["values"] = {"this": round(perc, 2)}
                            else:
                                data["values"] = {"this": None}
                        else:
                            data["values"] = dict(data["numerators"])
                            data["numerators"]["this"] = None
                    else:
                        calc_percent(data)

        calc_percent(root_data)

        if slices:
            for v in slices:
                root_data = root_data[v]

        add_metadata(root_data, self, db_table.active_release)

        return root_data, grand_total
Ejemplo n.º 6
0
    def get_stat_data(
        self,
        geo,
        fields=None,
        key_order=None,
        percent=True,
        total=None,
        recode=None,
        year=None,
    ):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param geo: the geography
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.
        :param str year: release year to use. None will try to use the current dataset context, and 'latest'
                         will use the latest release.

        :return: (data-dictionary, total)
        """
        db_table = self.get_db_table(year=year or current_context().get("year"))
        model = db_table.model
        columns = self.columns(db_table)

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in columns:
                        raise ValueError(
                            "Invalid field/column '%s' for table '%s'. Valid columns are: %s"
                            % (f, self.id, ", ".join(columns.keys()))
                        )
            else:
                fields = columns.keys()
                if self.total_column:
                    fields.remove(self.total_column)

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in columns:
                raise ValueError(
                    "Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s"
                    % (total, self.id, ", ".join(columns.keys()))
                )

            # table columns to fetch
            cols = [model.__table__.columns[c] for c in fields]

            if (
                total is not None
                and isinstance(total, basestring)
                and total not in cols
            ):
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = (
                session.query(*cols)
                .filter(
                    model.geo_level == geo.geo_level,
                    model.geo_code == geo.geo_code,
                    model.geo_version == geo.version,
                )
                .first()
            )

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = (
                key_order or fields
            )  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(
                    key, {"name": recode.get(field, columns[field]["name"])}
                )

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get("numerators", {}).get("this", 0)
                    field_info["values"] = {"this": p(val, total)}
                    field_info["numerators"] = {"this": val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get("values", {}).get("this", 0)
                    field_info["values"] = {"this": val}

            add_metadata(results, self, db_table.active_release)
            return results, total
        finally:
            session.close()
Ejemplo n.º 7
0
    def get_stat_data(self, geo_level, geo_code, fields=None, key_order=None,
                      percent=True, total=None, recode=None):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param str geo_level: the geographical level
        :param str geo_code: the geographical code
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.

        :return: (data-dictionary, total)
        """

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in self.columns:
                        raise ValueError("Invalid field/column '%s' for table '%s'. Valid columns are: %s" % (
                            f, self.id, ', '.join(self.columns.keys())))
            else:
                fields = self.columns.keys()
                if self.total_column:
                    fields.remove(self.total_column)

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in self.columns:
                raise ValueError("Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % (
                    total, self.id, ', '.join(self.columns.keys())))

            # table columns to fetch
            cols = [self.model.columns[c] for c in fields]

            if total is not None and isinstance(total, basestring) and total not in cols:
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = session\
                .query(*cols)\
                .filter(self.model.c.geo_level == geo_level,
                        self.model.c.geo_code == geo_code)\
                .first()

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = key_order or fields  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(key, {'name': recode.get(field, self.columns[field]['name'])})

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get('numerators', {}).get('this', 0)
                    field_info['values'] = {'this': p(val, total)}
                    field_info['numerators'] = {'this': val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get('values', {}).get('this', 0)
                    field_info['values'] = {'this': val}

            add_metadata(results, self)
            return results, total
        finally:
            session.close()
Ejemplo n.º 8
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ['source of water'], geo, session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators']['this']
    else:
        total_water_sp = 0.0

    # electricity
    elec_attrs = ['electricity for cooking',
                  'electricity for heating',
                  'electricity for lighting']
    db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level)
    objects = get_objects_by_geo(db_model_elec, geo, session)
    total_elec = 0.0
    total_some_elec = 0.0
    elec_access_data = {
        'total_all_elec': {
            "name": "Have electricity for everything",
            "numerators": {"this": 0.0},
        },
        'total_some_not_all_elec': {
            "name": "Have electricity for some things",
            "numerators": {"this": 0.0},
        },
        'total_no_elec': {
            "name": "No electricity",
            "numerators": {"this": 0.0},
        }
    }
    for obj in objects:
        total_elec += obj.total
        has_some = False
        has_all = True
        for attr in elec_attrs:
            val = not getattr(obj, attr).startswith('no ')
            has_all = has_all and val
            has_some = has_some or val
        if has_some:
            total_some_elec += obj.total
        if has_all:
            elec_access_data['total_all_elec']['numerators']['this'] += obj.total
        elif has_some:
            elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total
        else:
            elec_access_data['total_no_elec']['numerators']['this'] += obj.total

    for data, total in zip((elec_access_data,), (total_elec,)):
        for fields in data.values():
            fields["values"] = {"this": percent(fields["numerators"]["this"], total)}

    add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ['toilet facilities'], geo, session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    return {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name": "Are getting water from a regional or local service provider",
            "numerators": {"this": total_water_sp},
            "values": {"this": percent(total_water_sp, total_wsrc)},
        },
        'percentage_electricity_access': {
            "name": "Have electricity for at least one of cooking, heating or lighting",
            "numerators": {"this": total_some_elec},
            "values": {"this": percent(total_some_elec, total_elec)},
        },
        'electricity_access_distribution': elec_access_data,
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {"this": total_flush_toilet},
            "values": {"this": percent(total_flush_toilet, total_toilet)},
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {"this": total_no_toilet},
            "values": {"this": percent(total_no_toilet, total_toilet)},
        },
        'toilet_facilities_distribution': toilet_data,
    }
Ejemplo n.º 9
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
            ['source of water'], geo, session,
            recode=SHORT_WATER_SOURCE_CATEGORIES,
            order_by='-total')
    if 'Service provider' in water_src_data:
        total_water_sp = water_src_data['Service provider']['numerators']['this']
    else:
        total_water_sp = 0.0

    # refuse disposal
    db_model_ref = get_model_from_fields(['refuse disposal'], geo.geo_level)
    objects = get_objects_by_geo(db_model_ref, geo, session, order_by='-total')
    refuse_disp_data = OrderedDict()
    total_ref = 0.0
    total_ref_sp = 0.0
    for obj in objects:
        attr = getattr(obj, 'refuse disposal')
        disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr]
        refuse_disp_data[disp] = {
            "name": disp,
            "numerators": {"this": obj.total},
        }
        total_ref += obj.total
        if attr.startswith('Removed by local authority'):
            total_ref_sp += obj.total
    set_percent_values(refuse_disp_data, total_ref)
    add_metadata(refuse_disp_data, db_model_ref)

    # electricity
    if geo.version == '2011':
        elec_attrs = ['electricity for cooking',
                      'electricity for heating',
                      'electricity for lighting']
        db_model_elec = get_model_from_fields(elec_attrs, geo.geo_level)
        objects = get_objects_by_geo(db_model_elec, geo, session)
        total_elec = 0.0
        total_some_elec = 0.0
        elec_access_data = {
            'total_all_elec': {
                "name": "Have electricity for everything",
                "numerators": {"this": 0.0},
            },
            'total_some_not_all_elec': {
                "name": "Have electricity for some things",
                "numerators": {"this": 0.0},
            },
            'total_no_elec': {
                "name": "No electricity",
                "numerators": {"this": 0.0},
            }
        }
        for obj in objects:
            total_elec += obj.total
            has_some = False
            has_all = True
            for attr in elec_attrs:
                val = not getattr(obj, attr).startswith('no ')
                has_all = has_all and val
                has_some = has_some or val
            if has_some:
                total_some_elec += obj.total
            if has_all:
                elec_access_data['total_all_elec']['numerators']['this'] += obj.total
            elif has_some:
                elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total
            else:
                elec_access_data['total_no_elec']['numerators']['this'] += obj.total
        set_percent_values(elec_access_data, total_elec)
        add_metadata(elec_access_data, db_model_elec)

    # toilets
    toilet_data, total_toilet = get_stat_data(
            ['toilet facilities'], geo, session,
            exclude_zero=True,
            recode=COLLAPSED_TOILET_CATEGORIES,
            order_by='-total')

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith('Flush') or key.startswith('Chemical'):
            total_flush_toilet += data['numerators']['this']
        if key == 'None':
            total_no_toilet += data['numerators']['this']

    profile = {
        'water_source_distribution': water_src_data,
        'percentage_water_from_service_provider': {
            "name": "Are getting water from a regional or local service provider",
            "numerators": {"this": total_water_sp},
            "values": {"this": percent(total_water_sp, total_wsrc)},
        },
        'refuse_disposal_distribution': refuse_disp_data,
        'percentage_ref_disp_from_service_provider': {
            "name": "Are getting refuse disposal from a local authority or private company",
            "numerators": {"this": total_ref_sp},
            "values": {"this": percent(total_ref_sp, total_ref)},
        },
        'percentage_flush_toilet_access': {
            "name": "Have access to flush or chemical toilets",
            "numerators": {"this": total_flush_toilet},
            "values": {"this": percent(total_flush_toilet, total_toilet)},
        },
        'percentage_no_toilet_access': {
            "name": "Have no access to any toilets",
            "numerators": {"this": total_no_toilet},
            "values": {"this": percent(total_no_toilet, total_toilet)},
        },
        'toilet_facilities_distribution': toilet_data,
    }
    if geo.version == '2011':
        profile.update({
            'percentage_electricity_access': {
                "name": "Have electricity for at least one of cooking, heating or lighting",
                "numerators": {"this": total_some_elec},
                "values": {"this": percent(total_some_elec, total_elec)},
            },
            'electricity_access_distribution': elec_access_data,
        })
    return profile
Ejemplo n.º 10
0
def get_demographics_profile(geo_code, geo_level, session):
    # population group
    pop_dist_data, total_pop = get_stat_data(
            ['population group'], geo_level, geo_code, session)

    # language
    language_data, _ = get_stat_data(
            ['language'], geo_level, geo_code, session, order_by='-total')
    language_most_spoken = language_data[language_data.keys()[0]]

    # age groups
    age_dist_data, total_age = get_stat_data(
            ['age groups in 5 years'], geo_level, geo_code, session,
            table_name='agegroupsin5years',
            recode=COLLAPSED_AGE_CATEGORIES,
            key_order=('0-9', '10-19',
                       '20-29', '30-39',
                       '40-49', '50-59',
                       '60-69', '70-79',
                       '80+'))

    # sex
    db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender')
    query = session.query(func.sum(db_model_sex.total)) \
                   .filter(db_model_sex.gender == 'Male')
    query = query.filter(db_model_sex.geo_code == geo_code)
    total_male = query.one()[0]

    sex_data = OrderedDict((  # census data refers to sex as gender
            ('Female', {
                "name": "Female",
                "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)},
                "numerators": {"this": total_pop - total_male},
            }),
            ('Male', {
                "name": "Male",
                "values": {"this": round(total_male / total_pop * 100, 2)},
                "numerators": {"this": total_male},
            }),
        ))

    add_metadata(sex_data, db_model_sex)

    final_data = {
        'language_distribution': language_data,
        'language_most_spoken': language_most_spoken,
        'population_group_distribution': pop_dist_data,
        'age_group_distribution': age_dist_data,
        'sex_ratio': sex_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop},
        }
    }

    geo = geo_data.get_geography(geo_code, geo_level)
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "people per square kilometre",
            'values': {"this": total_pop / geo.square_kms},
        }

    # median age/age category
    db_model_age = get_model_from_fields(
        ['age in completed years'], geo_level,
        table_name='ageincompletedyears'
    )
    objects = sorted(
        get_objects_by_geo(db_model_age, geo_code, geo_level, session),
        key=lambda x: int(getattr(x, 'age in completed years'))
    )
    # median age
    median = calculate_median(objects, 'age in completed years')
    final_data['median_age'] = {
        "name": "Median age",
        "values": {"this": median},
    }

    # age category
    age_dist, _ = get_stat_data(
        ['age in completed years'], geo_level, geo_code, session,
        table_name='ageincompletedyearssimplified',
        key_order=['Under 18', '18 to 64', '65 and over'],
        recode={'< 18': 'Under 18',
                '>= 65': '65 and over'})
    final_data['age_category_distribution'] = age_dist

    # citizenship
    citizenship_dist, _ = get_stat_data(
            ['citizenship'], geo_level, geo_code, session,
            order_by='-total')

    sa_citizen = citizenship_dist['Yes']['numerators']['this']

    final_data['citizenship_distribution'] = citizenship_dist
    final_data['citizenship_south_african'] = {
            'name': 'South African citizens',
            'values': {'this': percent(sa_citizen, total_pop)},
            'numerators': {'this': sa_citizen},
            }

    # migration
    province_of_birth_dist, _ = get_stat_data(
            ['province of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total')

    final_data['province_of_birth_distribution'] = province_of_birth_dist

    def region_recode(field, key):
        if key == 'Born in South Africa':
            return 'South Africa'
        else:
            return key

    region_of_birth_dist, _ = get_stat_data(
            ['region of birth'], geo_level, geo_code, session,
            exclude_zero=True, order_by='-total',
            recode=region_recode)

    if 'South Africa' in region_of_birth_dist:
        born_in_sa = region_of_birth_dist['South Africa']['numerators']['this']
    else:
        born_in_sa = 0

    final_data['region_of_birth_distribution'] = region_of_birth_dist
    final_data['born_in_south_africa'] = {
            'name': 'Born in South Africa',
            'values': {'this': percent(born_in_sa, total_pop)},
            'numerators': {'this': born_in_sa},
            }

    return final_data
Ejemplo n.º 11
0
    def get_stat_data(self, fields, geo, session, order_by=None,
                      percent=True, total=None, only=None, exclude=None, exclude_zero=False,
                      recode=None, key_order=None, percent_grouping=None, slices=None, year=None,
                      db_table=None):
        """
        This is our primary helper routine for building a dictionary suitable for
        a place's profile page, based on a statistic.

        It sums over the data for ``fields`` in the database for the place identified by
        ``geo`` and calculates numerators and values. If multiple fields are given,
        it creates nested result dictionaries.

        Control the rows that are included or ignored using ``only``, ``exclude`` and ``exclude_zero``.

        The field values can be recoded using ``recode`` and and re-ordered using ``key_order``.

        :param fields: the census field to build stats for. Specify a list of fields to build
                       nested statistics. If multiple fields are specified, then the values
                       of parameters such as ``only``, ``exclude`` and ``recode`` will change.
                       These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level'
        :type fields: str or list
        :param geo: the geograhy object
        :param dbsession session: sqlalchemy session
        :param str order_by: field to order by, or None for default, eg. '-total'
        :param bool percent: should we calculate percentages, or just sum raw values?
        :param list percent_grouping: when calculating percentages, which fields should rows be grouped by?
                                      Default: none of them -- calculate each entry as a percentage of the
                                      whole dataset. Ignored unless ``percent`` is ``True``.
        :param int total: the total value to use for percentages, or None to total columns automatically
        :param list only: only include these field values. If ``fields`` has many items, this must be a dict
                          mapping field names to a list of strings.
        :type only: dict or list
        :param exclude: ignore these field values. If ``fields`` has many items, this must be a dict
                        mapping field names to a list of strings. Field names are checked
                        before any recoding.
        :type exclude: dict or list
        :param bool exclude_zero: ignore fields that have a zero or null total
        :param recode: function or dict to recode values of ``key_field``. If ``fields`` is a singleton,
                       then the keys of this dict must be the values to recode from, otherwise
                       they must be the field names and then the values. If this is a lambda,
                       it is called with the field name and its value as arguments.
        :type recode: dict or lambda
        :param key_order: ordering for keys in result dictionary. If ``fields`` has many items,
                          this must be a dict from field names to orderings.
                          The default ordering is determined by ``order``.
        :type key_order: dict or list
        :param list slices: return only a slice of the final data, by choosing a single value for each
                           field in the field list, as specified in the slice list.
        :param str year: release year to use. None will try to use the current dataset context, and 'latest'
                         will use the latest release.
        :param str db_table: database table and release to use. None will try
                             to use `year` if given, and the current dataset context.

        :return: (data-dictionary, total)
        """
        if not isinstance(fields, list):
            fields = [fields]

        n_fields = len(fields)
        many_fields = n_fields > 1

        if order_by is None:
            order_by = fields[0]

        if only is not None:
            if not isinstance(only, dict):
                if many_fields:
                    raise ValueError("If many fields are given, then only must be a dict. I got %s instead" % only)
                else:
                    only = {fields[0]: set(only)}

        if exclude is not None:
            if not isinstance(exclude, dict):
                if many_fields:
                    raise ValueError("If many fields are given, then exclude must be a dict. I got %s instead" % exclude)
                else:
                    exclude = {fields[0]: set(exclude)}

        if key_order:
            if not isinstance(key_order, dict):
                if many_fields:
                    raise ValueError("If many fields are given, then key_order must be a dict. I got %s instead" % key_order)
                else:
                    key_order = {fields[0]: key_order}
        else:
            key_order = {}

        if recode:
            if not isinstance(recode, dict) or not many_fields:
                recode = dict((f, recode) for f in fields)

        # get the release and underlying database table
        db_table = db_table or self.get_db_table(year=year)
        objects = self.get_rows_for_geo(geo, session, fields=fields, order_by=order_by, only=only, exclude=exclude, db_table=db_table)

        if total is not None and many_fields:
            raise ValueError("Cannot specify a total if many fields are given")

        if total and percent_grouping:
            raise ValueError("Cannot specify a total if percent_grouping is given")

        if total is None and percent and self.total_column is None:
            # The table doesn't support calculating percentages, but the caller
            # has asked for a percentage without providing a total value to use.
            # Either specify a total, or specify percent=False
            raise ValueError("Asking for a percent on table %s that doesn't support totals and no total parameter specified." % self.name)

        # sanity check the percent grouping
        if percent:
            if percent_grouping:
                for field in percent_grouping:
                    if field not in fields:
                        raise ValueError("Field '%s' specified in percent_grouping must be in the fields list." % field)
                # re-order percent grouping to be same order as in the field list
                percent_grouping = [f for f in fields if f in percent_grouping]
        else:
            percent_grouping = None

        root_data = OrderedDict()
        running_total = 0
        group_totals = {}
        grand_total = -1

        def get_recoded_key(recode, field, key):
            recoder = recode[field]
            if isinstance(recoder, dict):
                return recoder.get(key, key)
            else:
                return recoder(field, key)

        def get_data_object(obj):
            """ Recurse down the list of fields and return the
            final resting place for data for this stat. """
            data = root_data

            for i, field in enumerate(fields):
                key = getattr(obj, field)

                if recode and field in recode:
                    key = get_recoded_key(recode, field, key)
                else:
                    key = capitalize(key)

                # enforce key ordering the first time we see this field
                if (not data or data.keys() == ['metadata']) and field in key_order:
                    for fld in key_order[field]:
                        data[fld] = OrderedDict()

                # ensure it's there
                if key not in data:
                    data[key] = OrderedDict()

                data = data[key]

                # default values for intermediate fields
                if data is not None and i < n_fields - 1:
                    data['metadata'] = {'name': key}

            # data is now the dict where the end value is going to go
            if not data:
                data['name'] = key
                data['numerators'] = {'this': 0.0}

            return data

        # run the stats for the objects
        for obj in objects:
            if not obj.total and exclude_zero:
                continue

            if self.denominator_key and getattr(obj, self.fields[-1]) == self.denominator_key:
                grand_total = obj.total
                # don't include the denominator key in the output
                continue

            # get the data dict where these values must go
            data = get_data_object(obj)
            if not data:
                continue

            if obj.total is not None:
                data['numerators']['this'] += obj.total
                running_total += obj.total
            else:
                # TODO: sanity check this is the right thing to do for multiple fields with
                # nested nulls -- does aggregating over nulls treat them as zero, or should we
                # treat them as null?
                data['numerators']['this'] = None

            if percent_grouping:
                if obj.total is not None:
                    group_key = tuple()
                    for field in percent_grouping:
                        key = getattr(obj, field)
                        if recode and field in recode:
                            # Group by recoded keys
                            key = get_recoded_key(recode, field, key)
                        group_key = group_key + (key,)

                    data['_group_key'] = group_key
                    group_totals[group_key] = group_totals.get(group_key, 0) + obj.total

        if grand_total == -1:
            grand_total = running_total if total is None else total

        # add in percentages
        def calc_percent(data):
            for key, data in data.items():
                if not key == 'metadata':
                    if 'numerators' in data:
                        if percent:
                            if '_group_key' in data:
                                total = group_totals[data.pop('_group_key')]
                            else:
                                total = grand_total

                            if total is not None and data['numerators']['this'] is not None:
                                perc = 0 if total == 0 else (data['numerators']['this'] / total * 100)
                                data['values'] = {'this': round(perc, 2)}
                            else:
                                data['values'] = {'this': None}
                        else:
                            data['values'] = dict(data['numerators'])
                            data['numerators']['this'] = None
                    else:
                        calc_percent(data)

        calc_percent(root_data)

        if slices:
            for v in slices:
                root_data = root_data[v]

        add_metadata(root_data, self, db_table.active_release)

        return root_data, grand_total
Ejemplo n.º 12
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ["source of water"],
        geo,
        session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by="-total",
    )

    # water from a service provider
    total_water_sp = 0.0
    perc_water_sp = 0.0

    if current_context().get("year") == "latest":
        water_supplier_data, total_wspl = get_stat_data(
            ["supplier of water"],
            geo,
            session,
            recode=SHORT_WATER_SUPPLIER_CATEGORIES,
            order_by="-total",
        )

        water_sp = ["Service provider", "Water scheme"]

        for key in water_sp:
            if key in water_supplier_data:
                total_water_sp += water_supplier_data[key]["numerators"]["this"]

        perc_water_sp = percent(total_water_sp, total_wspl)

    else:
        if "Service provider" in water_src_data:
            total_water_sp = water_src_data["Service provider"]["numerators"]["this"]
            perc_water_sp = percent(total_water_sp, total_wsrc)

    percentage_water_from_service_provider = {
        "name": "Are getting water from a regional or local service provider",
        "numerators": {"this": total_water_sp},
        "values": {"this": perc_water_sp},
    }

    # refuse disposal
    refuse_disp_data, total_ref = get_stat_data(
        ["refuse disposal"],
        geo,
        session,
        recode=SHORT_REFUSE_DISPOSAL_CATEGORIES,
        order_by="-total",
    )

    total_ref_sp = 0.0
    for k, v in refuse_disp_data.iteritems():
        if k.startswith("Service provider"):
            total_ref_sp += v["numerators"]["this"]

    sp_name_2011 = (
        "Are getting refuse disposal from a local authority or private company"
    )
    sp_name_2016 = "Are getting refuse disposal from a local authority, private company or community members"

    percentage_ref_disp_from_service_provider = {
        "name": sp_name_2011
        if str(current_context().get("year")) == "2011"
        else sp_name_2016,
        "numerators": {"this": total_ref_sp},
        "values": {"this": percent(total_ref_sp, total_ref)},
    }

    # electricity
    if geo.version == "2011" and str(current_context().get("year")) == "2011":
        elec_attrs = [
            "electricity for cooking",
            "electricity for heating",
            "electricity for lighting",
        ]

        elec_table = get_datatable("electricityforcooking_electricityforheating_electr")
        objects = elec_table.get_rows_for_geo(geo, session)

        total_elec = 0.0
        total_some_elec = 0.0
        elec_access_data = {
            "total_all_elec": {
                "name": "Have electricity for everything",
                "numerators": {"this": 0.0},
            },
            "total_some_not_all_elec": {
                "name": "Have electricity for some things",
                "numerators": {"this": 0.0},
            },
            "total_no_elec": {"name": "No electricity", "numerators": {"this": 0.0}},
        }
        for obj in objects:
            total_elec += obj.total
            has_some = False
            has_all = True
            for attr in elec_attrs:
                val = not getattr(obj, attr).startswith("no ")
                has_all = has_all and val
                has_some = has_some or val
            if has_some:
                total_some_elec += obj.total
            if has_all:
                elec_access_data["total_all_elec"]["numerators"]["this"] += obj.total
            elif has_some:
                elec_access_data["total_some_not_all_elec"]["numerators"][
                    "this"
                ] += obj.total
            else:
                elec_access_data["total_no_elec"]["numerators"]["this"] += obj.total
        set_percent_values(elec_access_data, total_elec)
        add_metadata(
            elec_access_data,
            elec_table,
            elec_table.get_release(current_context().get("year")),
        )

    if current_context().get("year") == "latest":
        # We don't have this data for 2011
        elec_access, _ = get_stat_data(
            ["access to electricity"],
            geo,
            session,
            table_universe="Population",
            recode=ELECTRICITY_ACCESS_RECODE,
            order_by="-total",
        )

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ["toilet facilities"],
        geo,
        session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by="-total",
    )

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith("Flush") or key.startswith("Chemical"):
            total_flush_toilet += data["numerators"]["this"]
        if key == "None":
            total_no_toilet += data["numerators"]["this"]

    profile = {
        "water_source_distribution": water_src_data,
        "percentage_water_from_service_provider": percentage_water_from_service_provider,
        "refuse_disposal_distribution": refuse_disp_data,
        "percentage_ref_disp_from_service_provider": percentage_ref_disp_from_service_provider,
        "percentage_flush_toilet_access": {
            "name": "Have access to flush or chemical toilets",
            "numerators": {"this": total_flush_toilet},
            "values": {"this": percent(total_flush_toilet, total_toilet)},
        },
        "percentage_no_toilet_access": {
            "name": "Have no access to any toilets",
            "numerators": {"this": total_no_toilet},
            "values": {"this": percent(total_no_toilet, total_toilet)},
        },
        "toilet_facilities_distribution": toilet_data,
    }

    if current_context().get("year") == "latest":
        profile.update(
            {
                "water_supplier_distribution": water_supplier_data,
                "electricity_access": elec_access,
                "percentage_no_electricity_access": {
                    "name": "Have no access to electricity",
                    "numerators": elec_access["No access to electricity"]["numerators"],
                    "values": elec_access["No access to electricity"]["values"],
                },
            }
        )

    if geo.version == "2011":
        profile.update(
            {
                "percentage_electricity_access": {
                    "name": "Have electricity for at least one of cooking, heating or lighting",
                    "numerators": {"this": total_some_elec},
                    "values": {"this": percent(total_some_elec, total_elec)},
                },
                "electricity_access_distribution": elec_access_data,
            }
        )
    return profile