Exemple #1
0
def get_education_profile(geo, session):
    edu_dist_data, total_over_20 = get_stat_data(
        ["highest educational level"],
        geo,
        session,
        recode=COLLAPSED_EDUCATION_CATEGORIES,
        table_universe="Individuals 20 and older",
        key_order=EDUCATION_KEY_ORDER,
    )

    GENERAL_EDU = (
        EDUCATION_GET_OR_HIGHER
        if str(current_context().get("year")) == "2011"
        else EDUCATION_GET_OR_HIGHER_2016
    )
    general_edu, total_general_edu = get_stat_data(
        ["highest educational level"],
        geo,
        session,
        table_universe="Individuals 20 and older",
        only=GENERAL_EDU,
    )

    FURTHER_EDU = (
        EDUCATION_FET_OR_HIGHER
        if str(current_context().get("year")) == "2011"
        else EDUCATION_FET_OR_HIGHER_2016
    )
    further_edu, total_further_edu = get_stat_data(
        ["highest educational level"],
        geo,
        session,
        table_universe="Individuals 20 and older",
        only=FURTHER_EDU,
    )

    edu_split_data = {
        "percent_general_edu": {
            "name": "Completed Grade 9 or higher",
            "numerators": {"this": total_general_edu},
            "values": {"this": round(total_general_edu / total_over_20 * 100, 2)},
        },
        "percent_further_edu": {
            "name": "Completed Matric or higher",
            "numerators": {"this": total_further_edu},
            "values": {"this": round(total_further_edu / total_over_20 * 100, 2)},
        },
        "metadata": general_edu["metadata"],
    }

    profile = {
        "educational_attainment_distribution": edu_dist_data,
        "educational_attainment": edu_split_data,
    }

    return profile
def get_demographics_profile(geo, session):
    year = current_context().get('year')

    with dataset_context(year=year):
        # gender
        gender_dist_data, total_pop = get_stat_data(
            'gender', geo, session,
            table_fields=['gender', 'age group'])

        # age group
        age_group_dist_data, _ = get_stat_data(
            'age group', geo, session,
            table_fields=['gender', 'age group'])
        total_under_15 = age_group_dist_data['0-14 Years']['numerators']['this']

        # rural or urban
        rural_dist_data, _ = get_stat_data(
            ['rural or urban','gender'], geo, session,
            table_fields=['gender', 'rural or urban'])

    final_data = {
        'gender_ratio': gender_dist_data,
        'age_group_distribution': age_group_dist_data,
        'under_15': {
            'name': 'Under 15 years',
            'values': {'this': total_under_15}
        },
        'rural_distribution': rural_dist_data,
        'total_population': {
            "name": "People",
            "values": {"this": total_pop}
        }}

    return final_data
Exemple #3
0
def get_profile(geo, profile_name, request):
    session = get_session()

    try:
        comparative_geos = geo_data.get_comparative_geos(geo)
        data = {}
        data["primary_release_year"] = current_context().get("year")

        sections = list(PROFILE_SECTIONS)

        for section in sections:
            function_name = "get_%s_profile" % section
            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo, session)
                # if section == "indicator":
                #     # get profiles for comparative geometries
                #     for comp_geo in comparative_geos:
                #         try:
                #             merge_dicts(
                #                 data[section],
                #                 func(comp_geo, session),
                #                 comp_geo.geo_level,
                #             )
                #         except KeyError as e:
                #             msg = (
                #                 "Error merging data into %s for section '%s' from %s: KeyError: %s"
                #                 % (geo.geoid, section, comp_geo.geoid, e)
                #             )
                #             log.fatal(msg, exc_info=e)
                #             raise ValueError(msg)
    finally:
        session.close()

    return data
Exemple #4
0
    def get_db_table(self, release=None, year=None):
        """ Get a DBTable instance for a particular year or release,
        or the latest if neither are specified.
        """
        if year is None and release is None:
            from wazimap.data.utils import current_context

            # use the current context
            year = current_context().get("year")

        if year:
            release = self.get_release(year)

        if not release:
            raise ValueError(
                "Unclear which release year to use. Specify a release or a year, or use dataset_context(year=...)"
            )

        # get the db_table
        fieldname = self.release_class.__name__.lower() + "__release"
        query = self.db_table_releases.filter(**{fieldname: release})

        db_table = query.first()
        db_table.active_release = release
        self.setup_model(db_table)

        return db_table
def get_census_profile(geo, profile_name, request):
    geo.version = str(geo.version)
    session = get_session()
    year = current_context().get('year')
    try:
        data = {}
        sections = []
        selected_sections = []

        for cat in SECTIONS:
            sections.extend(SECTIONS[cat]['profiles'])

        for section in sections:
            section = section.lower().replace(' ', '_')
            function_name = 'get_%s_profile' % section
            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo, session)

        # tweaks to make the data nicer
        # show X largest groups on their own and group the rest as 'Other'
        if 'households' in sections:
            group_remainder(data['households']['roofing_material_distribution'], 5)
            group_remainder(data['households']['wall_material_distribution'], 5)

        data['all_sections'] = SECTIONS
        data['primary_release_year'] = year
        if (selected_sections == []): selected_sections = sections
        data['raw_selected_sections'] = selected_sections
        data['selected_sections'] = [x.replace(' ','_').lower() for x in selected_sections]
        data['afrobarometer'] = get_afrobarometer_profile(geo, session)
        return data

    finally:
        session.close()
Exemple #6
0
def get_profile(geo, profile_name, request):
    year = current_context().get('year')
    session = get_session()
    data = {}
    try:
        data['demographics'] = get_population(geo, session)
        data['primary_release_year'] = year
        data['afrobarometer'] = get_afrobarometer_profile(geo, session)
        return data
    finally:
        session.close()
Exemple #7
0
def get_profile(geo, profile_name, request):
    session = get_session()

    try:
        comp_geos = geo_data.get_comparative_geos(geo)
        data = {}
        sections = list(PROFILE_SECTIONS)
        if geo.geo_level not in [
                'country', 'province', 'district', 'municipality'
        ]:
            pass
            # Raise error as we don't have this data
        """
        The following is temporary and enables us to determine what to display for geos:

        Within WC: All indicators, with WC as root comparisson geo
        Outside WC: Some indicators, with ZA as root comparrison geo

        This is beacause Wazimap expects data for all geos.
        This will be removed once we have imported all the data.
        """
        # There are datasets with only WC information
        display_profile = 'WC' if (
            geo.geo_code == 'WC' or 'WC' in [cg.geo_code
                                             for cg in comp_geos]) else 'ZA'

        data['display_profile'] = display_profile
        data['primary_release_year'] = current_context().get('year')

        for section in sections:
            function_name = 'get_%s_profile' % section

            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo, session, display_profile)

                # get profiles for province and/or country
                for comp_geo in comp_geos:
                    # merge summary profile into current geo profile
                    merge_dicts(
                        data[section],
                        func(comp_geo,
                             session,
                             display_profile,
                             comparative=True), comp_geo.geo_level)

        # Make data look nicer on profile page
        group_remainder(data['demographics']['youth_population_by_language'],
                        11)

        return data

    finally:
        session.close()
Exemple #8
0
def get_land_audit_profile(geo, session):
    year = current_context().get('year')
    with dataset_context(year=year):
        land_use_dist = LOCATIONNOTFOUND
        land_user_dist = LOCATIONNOTFOUND
        land_distribution_gender = LOCATIONNOTFOUND
        land_ownership = LOCATIONNOTFOUND

        try:
            land_use_dist, _ = get_stat_data('land_use', geo, session,
                                             table_name='landuse',
                                             table_fields=['land_use'])
        except Exception as e:
            pass

        try:
            land_user_dist, _ = get_stat_data('land_user', geo, session,
                                              table_name='landuser',
                                              table_fields=['land_user'])
        except Exception:
            pass

        try:
            land_distribution_gender, _ = get_stat_data(
                'land_ownership_by_gender', geo, session,
                table_name='privatelanddistributionbygender',
                table_fields=['land_ownership_by_gender'])
        except Exception:
            pass

        try:
            land_ownership, _ = get_stat_data('private_vs_state_ownership', geo,
                                              session,
                                              table_name='landownership',
                                              table_fields=[
                                                  'private_vs_state_ownership'])
        except Exception:
            pass

        is_missing = land_user_dist.get('is_missing') and \
                     land_use_dist.get('is_missing') and \
                     land_distribution_gender.get('is_missing') and \
                     land_ownership.get('is_missing')

    return {
        'is_missing': is_missing,
        'land_user_dist': land_user_dist,
        'land_use_dist': land_use_dist,
        'land_distribution_gender': land_distribution_gender,
        'land_ownership': land_ownership,
    }
Exemple #9
0
def get_profile(geo, profile_name, request):
    session = get_session()
    data = {}
    year = current_context().get('year')

    try:
        data['primary_release_year'] = year
        data['demographics'] = get_demographics_profile(geo, session, year)
        data['households'] = get_households_profile(geo, session, year)
        data['disability'] = get_disabilities_profile(geo, session, year)
        data['elections2016'] = get_elections2016_profile(geo, session)
        data['afrobarometer'] = get_afrobarometer_profile(geo, session)

        return data

    finally:
        session.close()
Exemple #10
0
def get_profile(geo, profile_name, request):
    session = get_session()

    try:
        comp_geos = geo_data.get_comparative_geos(geo)
        data = {}
        sections = list(PROFILE_SECTIONS)
        if geo.geo_level not in ['country', 'province', 'district', 'municipality']:
            pass
            # Raise error as we don't have this data

        """
        The following is temporary and enables us to determine what to display for geos:

        Within WC: All indicators, with WC as root comparisson geo
        Outside WC: Some indicators, with ZA as root comparrison geo

        This is beacause Wazimap expects data for all geos.
        This will be removed once we have imported all the data.
        """
        # There are datasets with only WC information
        display_profile = 'WC' if (geo.geo_code == 'WC' or 'WC' in [cg.geo_code for cg in comp_geos]) else 'ZA'

        data['display_profile'] = display_profile
        data['primary_release_year'] = current_context().get('year')

        for section in sections:
            function_name = 'get_%s_profile' % section

            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo, session, display_profile)

                # get profiles for province and/or country
                for comp_geo in comp_geos:
                    # merge summary profile into current geo profile
                    merge_dicts(data[section], func(comp_geo, session, display_profile, comparative=True), comp_geo.geo_level)

        # Make data look nicer on profile page
        group_remainder(data['demographics']['youth_population_by_language'], 11)

        return data

    finally:
        session.close()
Exemple #11
0
def get_sectionaltitleland_profile(geo, session):
    year = current_context().get('year')
    with dataset_context(year=year):
        topic_profiles = SECTIONS['sectionaltitleland']['profiles']
        profiles_data = {'is_missing': True}

        for profile in topic_profiles:
            try:
                profile_table = profile.lower()
                profile_name = profile.lower().replace(' ', '_')
                profiles_data[profile_name] = LOCATIONNOTFOUND
                profiles_data[profile_name], _ = get_stat_data([profile_table],
                                                               geo, session)
            except Exception:
                pass

            profiles_data['is_missing'] = profiles_data.get('is_missing') and \
                                          profiles_data[profile_name].get(
                                              'is_missing')

    return profiles_data
Exemple #12
0
def get_profile(geo, profile_name, request):
    session = get_session()

    try:
        comparative_geos = geo_data.get_comparative_geos(geo)
        data = {}
        data["primary_release_year"] = current_context().get("year")

        sections = list(PROFILE_SECTIONS)

        for section in sections:
            function_name = "get_%s_profile" % section
            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo, session)

                # get profiles for comparative geometries
                for comp_geo in comparative_geos:
                    try:
                        merge_dicts(
                            data[section], func(comp_geo, session), comp_geo.geo_level
                        )
                    except KeyError as e:
                        msg = (
                            "Error merging data into %s for section '%s' from %s: KeyError: %s"
                            % (geo.geoid, section, comp_geo.geoid, e)
                        )
                        log.fatal(msg, exc_info=e)
                        raise ValueError(msg)
    finally:
        session.close()

    import json

    with open("example.json", "w") as f:
        json.dump(data, f)

    return data
Exemple #13
0
    def get_db_table(self, release=None, year=None):
        """ Get a DBTable instance for a particular year or release,
        or the latest if neither are specified.
        """
        if year is None and release is None:
            from wazimap.data.utils import current_context
            # use the current context
            year = current_context().get('year')

        if year:
            release = self.get_release(year)

        if not release:
            raise ValueError("Unclear which release year to use. Specify a release or a year, or use dataset_context(year=...)")

        # get the db_table
        fieldname = self.release_class.__name__.lower() + '__release'
        query = self.db_table_releases.filter(**{fieldname: release})

        db_table = query.first()
        db_table.active_release = release
        self.setup_model(db_table)

        return db_table
Exemple #14
0
def get_living_environment_profile(geo,
                                   session,
                                   display_profile,
                                   comparative=False):
    final_data = {}

    youth_electricity_access, _ = get_stat_data(
        ['electricity access'],
        geo,
        session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=ELECTRICITY_ACCESS_KEY_ORDER)

    youth_toilet_access, _ = get_stat_data(
        ['toilet access'],
        geo,
        session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=TOILET_ACCESS_KEY_ORDER)

    youth_water_access, _ = get_stat_data(
        ['water access'],
        geo,
        session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=WATER_ACCESS_KEY_ORDER)

    youth_type_of_dwelling, _ = get_stat_data(
        ['type of dwelling'],
        geo,
        session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=TYPE_OF_DWELLING_ORDER)

    informal_not_in_backyard = youth_type_of_dwelling.get(
        'Informal not in backyard', {}).get('values', {}).get('this', None)
    informal_in_backyard = youth_type_of_dwelling.get(
        'Informal in backyard', {}).get('values', {}).get('this', None)

    youth_dwelling_informal = None
    if informal_not_in_backyard or informal_in_backyard:
        youth_dwelling_informal = (informal_not_in_backyard
                                   or 0) + (informal_in_backyard or 0)

    type_of_area_order = (TYPE_OF_AREA_ORDER_2016
                          if current_context().get('year') == 'latest' else
                          TYPE_OF_AREA_ORDER)

    youth_type_of_area, _ = get_stat_data(
        ['type of area'],
        geo,
        session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=type_of_area_order)

    youth_household_crowded, _ = get_stat_data(
        ['household crowded'],
        geo,
        session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=HH_CROWDED_KEY_ORDER)

    if str(current_context().get('year')) == '2011':
        # The releases have different indicators for internet access
        youth_access_to_internet, _ = get_stat_data(
            ['access to internet'],
            geo,
            session,
            table_universe='Youth living in households',
            table_dataset='Census and Community Survey',
            table_name='youth_access_to_internet_gender',
            key_order=INTERNET_ACCESS_ORDER)

        final_data.update({
            'youth_no_access_to_internet': {
                "name":
                "Of youth live in households with no access to internet",
                "values": {
                    "this":
                    youth_access_to_internet['No access to internet']['values']
                    ['this']
                }
            }
        })

    else:
        youth_access_to_internet, _ = get_stat_data(
            ['access to internet'],
            geo,
            session,
            table_universe='Youth living in households',
            table_dataset='Census and Community Survey',
            order_by='-total')

        final_data.update({
            'youth_cell_phone_access_internet': {
                "name": "Of youth access the internet through a cell phone",
                "values": {
                    "this":
                    youth_access_to_internet['Cell phone']['values']['this']
                }
            }
        })

    youth_by_living_with_parents_status, _ = get_stat_data(
        ['living with parents'],
        geo,
        session,
        table_universe='Youth aged 15-19 living in households',
        table_dataset='Census and Community Survey',
        key_order=LIVING_WITH_PARENTS_KEY_ORDER)

    living_with_parent_keys = ('Both parents', 'Mother only', 'Father only')
    living_with_parents_stat = sum(
        v['values']['this'] or 0
        for k, v in youth_by_living_with_parents_status.iteritems()
        if k in living_with_parent_keys)

    final_data.update({
        'youth_electricity_access':
        youth_electricity_access,
        'youth_toilet_access':
        youth_toilet_access,
        'youth_water_access':
        youth_water_access,
        'youth_dwelling_informal': {
            "name":
            "Of youth live in households that are informal dwellings (shacks)",
            "values": {
                "this": youth_dwelling_informal
            }
        },
        'youth_type_of_dwelling':
        youth_type_of_dwelling,
        'youth_type_of_area':
        youth_type_of_area,
        'youth_households_overcrowded': {
            "name": "Of youth live in households that are overcrowded *",
            "values": {
                "this":
                youth_household_crowded['Overcrowded']['values']['this']
            }
        },
        'youth_household_crowded':
        youth_household_crowded,
        'youth_access_to_internet':
        youth_access_to_internet,
        'youth_living_with_parents': {
            "name":
            "Of youth aged 15-19 live with at least one biological parent",
            "values": {
                "this": living_with_parents_stat
            }
        },
        'youth_by_living_with_parents_status':
        youth_by_living_with_parents_status
    })

    return final_data
Exemple #15
0
def get_demographics_profile(geo, session, display_profile, comparative=False):
    youth_pop_table = get_datatable('youth_population')
    youth_pop, pop_total = youth_pop_table.get_stat_data(geo,
                                                         total='total_pop',
                                                         percent='False')

    youth_age_group_data, _ = get_stat_data(
        ['age groups in 10 years'],
        geo,
        session,
        table_universe='Population',
        table_dataset='Census and Community Survey')

    youth_gender_data, _ = get_stat_data(
        ['gender'],
        geo,
        session,
        table_fields=['gender', 'population group'],
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=GENDER_ORDER)

    population_group_order = (POPULATION_GROUP_ORDER_2016
                              if current_context().get('year') == 'latest' else
                              POPULATION_GROUP_ORDER)

    youth_pop_group_data, _ = get_stat_data(
        ['population group'],
        geo,
        session,
        table_fields=['population group', 'gender'],
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        table_name='youth_population_group_gender',
        key_order=population_group_order)

    youth_language_data, _ = get_stat_data(
        ['language'],
        geo,
        session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        order_by='-total')
    youth_language_most_spoken = youth_language_data[
        youth_language_data.keys()[0]]

    youth_province_birth_data, _ = get_stat_data(
        ['province of birth'],
        geo,
        session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=PROVINCE_ORDER)

    youth_region_birth_data, _ = get_stat_data(
        ['region of birth'],
        geo,
        session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=REGION_ORDER)

    youth_region_birth_data['SADC']['name'] = 'SADC*'

    youth_citizenship_data, _ = get_stat_data(
        ['citizenship'],
        geo,
        session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=CITIZENSHIP_ORDER)

    final_data = {
        'total_population': {
            "name": "People",
            "values": {
                "this": pop_total
            }
        },
        'youth_population_total': {
            "name": "Youth aged 15-24",
            "values": {
                "this": youth_pop['youth_pop']['numerators']['this']
            }
        },
        'youth_population_perc': {
            "name": "Of the population are youth aged 15-24",
            "values": {
                "this": youth_pop['youth_pop']['values']['this']
            },
        },
        'youth_population_by_age_group': youth_age_group_data,
        'youth_population_by_gender': youth_gender_data,
        'youth_population_by_pop_group': youth_pop_group_data,
        'youth_language_most_spoken': youth_language_most_spoken,
        'youth_population_by_language': youth_language_data,
        'youth_born_in_sa': {
            "name": "Of the youth population were born in South Africa",
            "values": {
                "this":
                youth_region_birth_data['South Africa']['values']['this']
            },
        },
        'youth_by_province_of_birth': youth_province_birth_data,
        'youth_by_region_of_birth': youth_region_birth_data,
        'youth_sa_citizenship': {
            'name': 'of the youth population are South African citizens',
            'values': {
                'this': youth_citizenship_data['Yes']['values']['this']
            }
        },
        'youth_by_citizenship': youth_citizenship_data,
    }

    # The following info is displayed in the block over the map
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "youth per square kilometre",
            'values': {
                "this":
                youth_pop['youth_pop']['numerators']['this'] / geo.square_kms
            }
        }

    return final_data
Exemple #16
0
def get_living_environment_profile(geo, session, display_profile, comparative=False):
    final_data = {}

    youth_electricity_access, _ = get_stat_data(
        ['electricity access'], geo, session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=ELECTRICITY_ACCESS_KEY_ORDER)

    youth_toilet_access, _ = get_stat_data(
        ['toilet access'], geo, session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=TOILET_ACCESS_KEY_ORDER)

    youth_water_access, _ = get_stat_data(
        ['water access'], geo, session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=WATER_ACCESS_KEY_ORDER)

    youth_type_of_dwelling, _ = get_stat_data(
        ['type of dwelling'], geo, session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=TYPE_OF_DWELLING_ORDER)

    informal_not_in_backyard = youth_type_of_dwelling.get('Informal not in backyard', {}).get('values', {}).get('this', None)
    informal_in_backyard = youth_type_of_dwelling.get('Informal in backyard', {}).get('values', {}).get('this', None)

    youth_dwelling_informal = None
    if informal_not_in_backyard or informal_in_backyard:
        youth_dwelling_informal = (informal_not_in_backyard or 0) + (informal_in_backyard or 0)

    type_of_area_order = (TYPE_OF_AREA_ORDER_2016
        if current_context().get('year') == 'latest'
        else TYPE_OF_AREA_ORDER)

    youth_type_of_area, _ = get_stat_data(
        ['type of area'], geo, session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=type_of_area_order)

    youth_household_crowded, _ = get_stat_data(
        ['household crowded'], geo, session,
        table_universe='Youth living in households',
        table_dataset='Census and Community Survey',
        key_order=HH_CROWDED_KEY_ORDER)

    if str(current_context().get('year')) == '2011':
        # The releases have different indicators for internet access
        youth_access_to_internet, _ = get_stat_data(
            ['access to internet'], geo, session,
            table_universe='Youth living in households',
            table_dataset='Census and Community Survey',
            table_name='youth_access_to_internet_gender',
            key_order=INTERNET_ACCESS_ORDER)

        final_data.update({
            'youth_no_access_to_internet': {
                "name": "Of youth live in households with no access to internet",
                "values": {"this": youth_access_to_internet['No access to internet']['values']['this']}
            }
        })

    else:
        youth_access_to_internet, _ = get_stat_data(
            ['access to internet'], geo, session,
            table_universe='Youth living in households',
            table_dataset='Census and Community Survey',
            order_by='-total')

        final_data.update({
            'youth_cell_phone_access_internet': {
                "name": "Of youth access the internet through a cell phone",
                "values": {"this": youth_access_to_internet['Cell phone']['values']['this']}
            }
        })

    youth_by_living_with_parents_status, _ = get_stat_data(
        ['living with parents'], geo, session,
        table_universe='Youth aged 15-19 living in households',
        table_dataset='Census and Community Survey',
        key_order=LIVING_WITH_PARENTS_KEY_ORDER)

    living_with_parent_keys = ('Both parents', 'Mother only', 'Father only')
    living_with_parents_stat = sum(
        v['values']['this'] or 0
        for k, v in youth_by_living_with_parents_status.iteritems()
        if k in living_with_parent_keys)

    final_data.update({
        'youth_electricity_access': youth_electricity_access,
        'youth_toilet_access': youth_toilet_access,
        'youth_water_access': youth_water_access,
        'youth_dwelling_informal': {
            "name": "Of youth live in households that are informal dwellings (shacks)",
            "values": {"this": youth_dwelling_informal}
        },
        'youth_type_of_dwelling': youth_type_of_dwelling,
        'youth_type_of_area': youth_type_of_area,
        'youth_households_overcrowded': {
            "name": "Of youth live in households that are overcrowded *",
            "values": {"this": youth_household_crowded['Overcrowded']['values']['this']}
        },
        'youth_household_crowded': youth_household_crowded,
        'youth_access_to_internet': youth_access_to_internet,
        'youth_living_with_parents': {
            "name": "Of youth aged 15-19 live with at least one biological parent",
            "values": {"this": living_with_parents_stat}
        },
        'youth_by_living_with_parents_status': youth_by_living_with_parents_status
    })

    return final_data
Exemple #17
0
    def get_stat_data(self, geo, fields=None, key_order=None, percent=True, total=None, recode=None, year=None):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param geo: the geography
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.
        :param str year: release year to use. None will try to use the current dataset context, and 'latest'
                         will use the latest release.

        :return: (data-dictionary, total)
        """
        db_table = self.get_db_table(year=year or current_context().get('year'))
        model = db_table.model
        columns = self.columns(db_table)

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in columns:
                        raise ValueError("Invalid field/column '%s' for table '%s'. Valid columns are: %s" % (
                            f, self.id, ', '.join(columns.keys())))
            else:
                fields = columns.keys()
                if self.total_column:
                    fields.remove(self.total_column)

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in columns:
                raise ValueError("Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % (
                    total, self.id, ', '.join(columns.keys())))

            # table columns to fetch
            cols = [model.__table__.columns[c] for c in fields]

            if total is not None and isinstance(total, basestring) and total not in cols:
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = session\
                .query(*cols)\
                .filter(model.geo_level == geo.geo_level,
                        model.geo_code == geo.geo_code,
                        model.geo_version == geo.version)\
                .first()

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = key_order or fields  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(key, {'name': recode.get(field, columns[field]['name'])})

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get('numerators', {}).get('this', 0)
                    field_info['values'] = {'this': p(val, total)}
                    field_info['numerators'] = {'this': val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get('values', {}).get('this', 0)
                    field_info['values'] = {'this': val}

            add_metadata(results, self, db_table.active_release)
            return results, total
        finally:
            session.close()
Exemple #18
0
def get_service_delivery_profile(geo, session):
    # water source
    water_src_data, total_wsrc = get_stat_data(
        ["source of water"],
        geo,
        session,
        recode=SHORT_WATER_SOURCE_CATEGORIES,
        order_by="-total",
    )

    # water from a service provider
    total_water_sp = 0.0
    perc_water_sp = 0.0

    if current_context().get("year") == "latest":
        water_supplier_data, total_wspl = get_stat_data(
            ["supplier of water"],
            geo,
            session,
            recode=SHORT_WATER_SUPPLIER_CATEGORIES,
            order_by="-total",
        )

        water_sp = ["Service provider", "Water scheme"]

        for key in water_sp:
            if key in water_supplier_data:
                total_water_sp += water_supplier_data[key]["numerators"]["this"]

        perc_water_sp = percent(total_water_sp, total_wspl)

    else:
        if "Service provider" in water_src_data:
            total_water_sp = water_src_data["Service provider"]["numerators"]["this"]
            perc_water_sp = percent(total_water_sp, total_wsrc)

    percentage_water_from_service_provider = {
        "name": "Are getting water from a regional or local service provider",
        "numerators": {"this": total_water_sp},
        "values": {"this": perc_water_sp},
    }

    # refuse disposal
    refuse_disp_data, total_ref = get_stat_data(
        ["refuse disposal"],
        geo,
        session,
        recode=SHORT_REFUSE_DISPOSAL_CATEGORIES,
        order_by="-total",
    )

    total_ref_sp = 0.0
    for k, v in refuse_disp_data.iteritems():
        if k.startswith("Service provider"):
            total_ref_sp += v["numerators"]["this"]

    sp_name_2011 = (
        "Are getting refuse disposal from a local authority or private company"
    )
    sp_name_2016 = "Are getting refuse disposal from a local authority, private company or community members"

    percentage_ref_disp_from_service_provider = {
        "name": sp_name_2011
        if str(current_context().get("year")) == "2011"
        else sp_name_2016,
        "numerators": {"this": total_ref_sp},
        "values": {"this": percent(total_ref_sp, total_ref)},
    }

    # electricity
    if geo.version == "2011" and str(current_context().get("year")) == "2011":
        elec_attrs = [
            "electricity for cooking",
            "electricity for heating",
            "electricity for lighting",
        ]

        elec_table = get_datatable("electricityforcooking_electricityforheating_electr")
        objects = elec_table.get_rows_for_geo(geo, session)

        total_elec = 0.0
        total_some_elec = 0.0
        elec_access_data = {
            "total_all_elec": {
                "name": "Have electricity for everything",
                "numerators": {"this": 0.0},
            },
            "total_some_not_all_elec": {
                "name": "Have electricity for some things",
                "numerators": {"this": 0.0},
            },
            "total_no_elec": {"name": "No electricity", "numerators": {"this": 0.0}},
        }
        for obj in objects:
            total_elec += obj.total
            has_some = False
            has_all = True
            for attr in elec_attrs:
                val = not getattr(obj, attr).startswith("no ")
                has_all = has_all and val
                has_some = has_some or val
            if has_some:
                total_some_elec += obj.total
            if has_all:
                elec_access_data["total_all_elec"]["numerators"]["this"] += obj.total
            elif has_some:
                elec_access_data["total_some_not_all_elec"]["numerators"][
                    "this"
                ] += obj.total
            else:
                elec_access_data["total_no_elec"]["numerators"]["this"] += obj.total
        set_percent_values(elec_access_data, total_elec)
        add_metadata(
            elec_access_data,
            elec_table,
            elec_table.get_release(current_context().get("year")),
        )

    if current_context().get("year") == "latest":
        # We don't have this data for 2011
        elec_access, _ = get_stat_data(
            ["access to electricity"],
            geo,
            session,
            table_universe="Population",
            recode=ELECTRICITY_ACCESS_RECODE,
            order_by="-total",
        )

    # toilets
    toilet_data, total_toilet = get_stat_data(
        ["toilet facilities"],
        geo,
        session,
        exclude_zero=True,
        recode=COLLAPSED_TOILET_CATEGORIES,
        order_by="-total",
    )

    total_flush_toilet = 0.0
    total_no_toilet = 0.0
    for key, data in toilet_data.iteritems():
        if key.startswith("Flush") or key.startswith("Chemical"):
            total_flush_toilet += data["numerators"]["this"]
        if key == "None":
            total_no_toilet += data["numerators"]["this"]

    profile = {
        "water_source_distribution": water_src_data,
        "percentage_water_from_service_provider": percentage_water_from_service_provider,
        "refuse_disposal_distribution": refuse_disp_data,
        "percentage_ref_disp_from_service_provider": percentage_ref_disp_from_service_provider,
        "percentage_flush_toilet_access": {
            "name": "Have access to flush or chemical toilets",
            "numerators": {"this": total_flush_toilet},
            "values": {"this": percent(total_flush_toilet, total_toilet)},
        },
        "percentage_no_toilet_access": {
            "name": "Have no access to any toilets",
            "numerators": {"this": total_no_toilet},
            "values": {"this": percent(total_no_toilet, total_toilet)},
        },
        "toilet_facilities_distribution": toilet_data,
    }

    if current_context().get("year") == "latest":
        profile.update(
            {
                "water_supplier_distribution": water_supplier_data,
                "electricity_access": elec_access,
                "percentage_no_electricity_access": {
                    "name": "Have no access to electricity",
                    "numerators": elec_access["No access to electricity"]["numerators"],
                    "values": elec_access["No access to electricity"]["values"],
                },
            }
        )

    if geo.version == "2011":
        profile.update(
            {
                "percentage_electricity_access": {
                    "name": "Have electricity for at least one of cooking, heating or lighting",
                    "numerators": {"this": total_some_elec},
                    "values": {"this": percent(total_some_elec, total_elec)},
                },
                "electricity_access_distribution": elec_access_data,
            }
        )
    return profile
Exemple #19
0
def get_economics_profile(geo, session):
    profile = {}
    # income
    if geo.version == "2011":
        # distribution
        recode = COLLAPSED_MONTHLY_INCOME_CATEGORIES
        fields = ["employed individual monthly income"]
        income_dist_data, total_workers = get_stat_data(
            fields,
            geo,
            session,
            exclude=["Not applicable"],
            recode=recode,
            key_order=recode.values(),
        )

        # median income
        median = calculate_median_stat(income_dist_data)
        median_income = ESTIMATED_MONTHLY_INCOME_CATEGORIES[median]
        profile.update(
            {
                "individual_income_distribution": income_dist_data,
                "median_individual_income": {
                    "name": "Average monthly income",
                    "values": {"this": median_income},
                },
            }
        )
    else:
        # distribution
        recode = COLLAPSED_ANNUAL_INCOME_CATEGORIES
        fields = ["employed individual annual income"]
        income_dist_data, total_workers = get_stat_data(
            fields,
            geo,
            session,
            exclude=["Not applicable"],
            recode=recode,
            key_order=recode.values(),
        )

        # median income
        median = calculate_median_stat(income_dist_data)
        median_income = ESTIMATED_ANNUAL_INCOME_CATEGORIES[median]
        profile.update(
            {
                "individual_annual_income_distribution": income_dist_data,
                "median_annual_individual_income": {
                    "name": "Average annual income",
                    "values": {"this": median_income},
                },
            }
        )

    # employment status
    employ_status, total_workers = get_stat_data(
        ["official employment status"],
        geo,
        session,
        exclude=["Age less than 15 years", "Not applicable"],
        order_by="official employment status",
        table_name="officialemploymentstatus",
    )

    # sector
    sector_dist_data, _ = get_stat_data(
        ["type of sector"],
        geo,
        session,
        exclude=["Not applicable"],
        order_by="type of sector",
    )

    profile.update(
        {
            "employment_status": employ_status,
            "sector_type_distribution": sector_dist_data,
        }
    )

    # access to internet
    if current_context().get("year") == "latest":
        internet_access_dist, total_households = get_stat_data(
            ["access to internet"],
            geo,
            session,
            recode=INTERNET_ACCESS_RECODE,
            table_name="accesstointernet_2016",
        )

        profile.update({"internet_access_distribution": internet_access_dist})

    else:
        internet_access_dist, total_with_access = get_stat_data(
            ["access to internet"],
            geo,
            session,
            exclude=["No access to internet"],
            order_by="access to internet",
        )
        _, total_without_access = get_stat_data(
            ["access to internet"], geo, session, only=["No access to internet"]
        )
        total_households = total_with_access + total_without_access

        profile.update(
            {
                "internet_access_distribution": internet_access_dist,
                "internet_access": {
                    "name": "Households with internet access",
                    "values": {"this": percent(total_with_access, total_households)},
                    "numerators": {"this": total_with_access},
                },
            }
        )

    return profile
Exemple #20
0
    def get_stat_data(
        self,
        geo,
        fields=None,
        key_order=None,
        percent=True,
        total=None,
        recode=None,
        year=None,
    ):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param geo: the geography
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.
        :param str year: release year to use. None will try to use the current dataset context, and 'latest'
                         will use the latest release.

        :return: (data-dictionary, total)
        """
        db_table = self.get_db_table(year=year or current_context().get("year"))
        model = db_table.model
        columns = self.columns(db_table)

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in columns:
                        raise ValueError(
                            "Invalid field/column '%s' for table '%s'. Valid columns are: %s"
                            % (f, self.id, ", ".join(columns.keys()))
                        )
            else:
                fields = columns.keys()
                if self.total_column:
                    fields.remove(self.total_column)

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in columns:
                raise ValueError(
                    "Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s"
                    % (total, self.id, ", ".join(columns.keys()))
                )

            # table columns to fetch
            cols = [model.__table__.columns[c] for c in fields]

            if (
                total is not None
                and isinstance(total, basestring)
                and total not in cols
            ):
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = (
                session.query(*cols)
                .filter(
                    model.geo_level == geo.geo_level,
                    model.geo_code == geo.geo_code,
                    model.geo_version == geo.version,
                )
                .first()
            )

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = (
                key_order or fields
            )  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(
                    key, {"name": recode.get(field, columns[field]["name"])}
                )

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get("numerators", {}).get("this", 0)
                    field_info["values"] = {"this": p(val, total)}
                    field_info["numerators"] = {"this": val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get("values", {}).get("this", 0)
                    field_info["values"] = {"this": val}

            add_metadata(results, self, db_table.active_release)
            return results, total
        finally:
            session.close()
Exemple #21
0
def get_demographics_profile(geo, session, display_profile, comparative=False):
    youth_pop_table = get_datatable('youth_population')
    youth_pop, pop_total = youth_pop_table.get_stat_data(
        geo, total='total_pop', percent='False')

    youth_age_group_data, _ = get_stat_data(
        ['age groups in 10 years'], geo, session,
        table_universe='Population',
        table_dataset='Census and Community Survey')

    youth_gender_data, _ = get_stat_data(
        ['gender'], geo, session,
        table_fields = ['gender', 'population group'],
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=GENDER_ORDER)

    population_group_order = (POPULATION_GROUP_ORDER_2016
        if current_context().get('year') == 'latest'
        else POPULATION_GROUP_ORDER)

    youth_pop_group_data, _ = get_stat_data(
        ['population group'], geo, session,
        table_fields = ['population group', 'gender'],
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        table_name='youth_population_group_gender',
        key_order=population_group_order)

    youth_language_data, _ = get_stat_data(
        ['language'], geo, session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        order_by='-total'
    )
    youth_language_most_spoken = youth_language_data[youth_language_data.keys()[0]]

    youth_province_birth_data, _ = get_stat_data(
        ['province of birth'], geo, session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=PROVINCE_ORDER)

    youth_region_birth_data, _ = get_stat_data(
        ['region of birth'], geo, session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=REGION_ORDER)

    youth_region_birth_data['SADC']['name'] = 'SADC*'

    youth_citizenship_data, _ = get_stat_data(
        ['citizenship'], geo, session,
        table_universe='Youth',
        table_dataset='Census and Community Survey',
        key_order=CITIZENSHIP_ORDER)

    final_data = {
        'total_population': {
            "name": "People",
            "values": {"this": pop_total}
        },
        'youth_population_total': {
            "name": "Youth aged 15-24",
            "values": {"this": youth_pop['youth_pop']['numerators']['this']}
        },
        'youth_population_perc': {
            "name": "Of the population are youth aged 15-24",
            "values": {"this": youth_pop['youth_pop']['values']['this']},
        },
        'youth_population_by_age_group': youth_age_group_data,
        'youth_population_by_gender': youth_gender_data,
        'youth_population_by_pop_group': youth_pop_group_data,
        'youth_language_most_spoken': youth_language_most_spoken,
        'youth_population_by_language': youth_language_data,
        'youth_born_in_sa': {
            "name": "Of the youth population were born in South Africa",
            "values": {"this": youth_region_birth_data['South Africa']['values']['this']},
        },
        'youth_by_province_of_birth': youth_province_birth_data,
        'youth_by_region_of_birth': youth_region_birth_data,
        'youth_sa_citizenship': {
            'name': 'of the youth population are South African citizens',
            'values': {'this': youth_citizenship_data['Yes']['values']['this']}
        },
        'youth_by_citizenship': youth_citizenship_data,
    }

    # The following info is displayed in the block over the map
    if geo.square_kms:
        final_data['population_density'] = {
            'name': "youth per square kilometre",
            'values': {"this": youth_pop['youth_pop']['numerators']['this'] / geo.square_kms}
        }

    return final_data