예제 #1
0
    def raw_data_for_geos(self, geos):
        data = {}

        # group by geo level
        geos = sorted(geos, key=lambda g: g.level)
        for geo_level, geos in groupby(geos, lambda g: g.level):
            geo_codes = [g.code for g in geos]

            # initial values
            for geo_code in geo_codes:
                data['%s-%s' % (geo_level, geo_code)] = {
                    'estimate': {},
                    'error': {}
                }

            session = get_session()
            try:
                geo_values = None
                rows = session\
                    .query(self.model)\
                    .filter(self.model.c.geo_level == geo_level)\
                    .filter(self.model.c.geo_code.in_(geo_codes))\
                    .all()

                for row in rows:
                    geo_values = data['%s-%s' % (geo_level, row.geo_code)]

                    for col in self.columns.iterkeys():
                        geo_values['estimate'][col] = getattr(row, col)
                        geo_values['error'][col] = 0

            finally:
                session.close()

        return data
예제 #2
0
def get_census_profile(geo_code, geo_level):
    session = get_session()

    try:
        geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session)
        data = {}

        for section in PROFILE_SECTIONS:
            function_name = 'get_%s_profile' % section
            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo_code, geo_level, session)

                # get profiles for province and/or country
                for level, code in geo_summary_levels:
                    # merge summary profile into current geo profile
                    merge_dicts(data[section], func(code, level, session), level)

        # tweaks to make the data nicer
        # show 3 largest groups on their own and group the rest as 'Other'
        group_remainder(data['service_delivery']['water_source_distribution'])
        group_remainder(data['service_delivery']['refuse_disposal_distribution'])
        group_remainder(data['service_delivery']['toilet_facilities_distribution'], 5)
        group_remainder(data['demographics']['language_distribution'], 7)
        
        return data

    finally:
        session.close()
예제 #3
0
    def raw_data_for_geos(self, geos):
        data = {}

        # group by geo level
        geos = sorted(geos, key=lambda g: g.level)
        for geo_level, geos in groupby(geos, lambda g: g.level):
            geo_codes = [g.code for g in geos]

            # initial values
            for geo_code in geo_codes:
                data['%s-%s' % (geo_level, geo_code)] = {
                    'estimate': {},
                    'error': {}}

            session = get_session()
            try:
                geo_values = None
                rows = session\
                    .query(self.table)\
                    .filter(self.table.c.geo_level == geo_level)\
                    .filter(self.table.c.geo_code.in_(geo_codes))\
                    .all()

                for row in rows:
                    geo_values = data['%s-%s' % (geo_level, row.geo_code)]

                    for col in self.columns.iterkeys():
                        geo_values['estimate'][col] = getattr(row, col)
                        geo_values['error'][col] = 0

            finally:
                session.close()

        return data
예제 #4
0
def get_locations_from_coords(longitude, latitude):
    '''
    Calls the Wards API to get a single ward containing the coordinates.
    Returns the serialized ward, municipality and province.
    '''
    location = ward_search_api.search("%s,%s" % (latitude, longitude))
    if len(location) == 0:
        return []
    # there should only be 1 ward since wards don't overlap
    location = location[0]

    session = get_session()
    try:
        ward = session.query(Ward).get(location.ward_code)
        if ward is None:
            return []

        # this is the reverse order of a normal search - the
        # narrowest location match comes first.
        objects = [ward, ward.municipality, ward.province, ward.country]
        objects = filter(lambda o: bool(o), objects)  # remove None

        return serialize_demarcations(objects)

    finally:
        session.close()
예제 #5
0
def main():
    """if len(sys.argv) != 2:
        raise ValueError("Requires 1 file path argument")"""
    filepath = '/Users/kooshag/Downloads/sina_1.csv'
    if not os.path.isabs(filepath):
        filepath = os.path.join(os.getcwd(), filepath)

    # create table if necessary
    Base.metadata.create_all(_engine, tables=[Votes.__table__])
    session = get_session()

    total = 474395
    for i, values in enumerate(open_elections_csv(filepath)):
        values['district_code'] = session.query(Municipality) \
                                         .get(values['municipality_code']) \
                                         .district_code
        values['section_24a_votes'] = None
        values['special_votes'] = None
        session.add(Votes(**values))
        if i % 1000 == 0:
            session.flush()
            sys.stdout.write('\r%s of %s' % (i + 1, total))
            sys.stdout.flush()

    print '\nDone'
    session.commit()
    session.close()
예제 #6
0
def main():
    """if len(sys.argv) != 2:
        raise ValueError("Requires 1 file path argument")"""
    filepath = '/Users/kooshag/Downloads/sina_1.csv'
    if not os.path.isabs(filepath):
        filepath = os.path.join(os.getcwd(), filepath)

    # create table if necessary
    Base.metadata.create_all(_engine, tables=[Votes.__table__])
    session = get_session()

    total = 474395
    for i, values in enumerate(open_elections_csv(filepath)):
        values['district_code'] = session.query(Municipality) \
                                         .get(values['municipality_code']) \
                                         .district_code
        values['section_24a_votes'] = None
        values['special_votes'] = None
        session.add(Votes(**values))
        if i % 1000 == 0:
            session.flush()
            sys.stdout.write('\r%s of %s' % (i + 1, total))
            sys.stdout.flush()

    print '\nDone'
    session.commit()
    session.close()
예제 #7
0
    def import_crimes(self):
        session = get_session()

        table = get_datatable(self.table_id)
        model = table.get_model(self.geo_level)
        geo_code_attr = '%s_code' % self.geo_level

        with open(filepath) as f:
            reader = csv.DictReader(f, delimiter=",")

            for row in reader:
                args = {}
                for key, val in row.iteritems():
                    key = key.lower()
                    if key == 'geo_code':
                        args[geo_code_attr] = val
                    elif key == 'total':
                        args['total'] = int(val)
                    else:
                        args[key] = val

                item = model(**args)
                session.add(item)

        session.commit()
예제 #8
0
    def do(self):
        print('Enriching projects...')

        do = DiscoverOrgClient()

        projects = Project.objects.filter(status='running bots')
        for project in projects:
            sheet, worksheet = get_session(project.url, project.worksheet)

            contacts = worksheet.get_all_records()
            for contact in contacts:
                if contact['Status'] != 'Completed':
                    contact = do.enrich(contact)
                    if type(contact) is str:
                        continue
                else:
                    continue

                for k, v in contact.items():
                    row = contacts.index(contact) + 2
                    col = worksheet.find(k).col

                    worksheet.update_cell(row, col, v)

                    time.sleep(1)

                time.sleep(3)

            project.status = 'in progress'
            project.save()
예제 #9
0
def get_locations_from_coords(longitude, latitude):
    '''
    Calls the Wards API to get a single ward containing the coordinates.
    Returns the serialized ward, municipality and province.
    '''
    location = ward_search_api.search("%s,%s" % (latitude, longitude))
    if len(location) == 0:
        return []
    # there should only be 1 ward since wards don't overlap
    location = location[0]

    session = get_session()
    try:
        ward = session.query(Ward).get(location.ward_code)
        if ward is None:
            return []

        # this is the reverse order of a normal search - the
        # narrowest location match comes first.
        objects = [ward, ward.municipality, ward.province, ward.country]
        objects = filter(lambda o: bool(o), objects)  # remove None

        return serialize_demarcations(objects)

    finally:
        session.close()
예제 #10
0
    def import_crimes(self):
        session = get_session()

        table = get_datatable(self.table_id)
        model = table.get_model(self.geo_level)
        geo_code_attr = '%s_code' % self.geo_level

        with open(filepath) as f:
            reader = csv.DictReader(f, delimiter=",")

            for row in reader:
                args = {}
                for key, val in row.iteritems():
                    key = key.lower()
                    if key == 'geo_code':
                        args[geo_code_attr] = val
                    elif key == 'total':
                        args['total'] = int(val)
                    else:
                        args[key] = val

                item = model(**args)
                session.add(item)

        session.commit()
예제 #11
0
def get_elections_profile(geo_code, geo_level):
    data = {}
    session = get_session()
    try:
        geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session)

        for election in AVAILABLE_ELECTIONS:
            section = election['name'].lower().replace(' ', '_')
            data[section] = get_election_data(geo_code, geo_level, election,
                                              session)

            # get profiles for province and/or country
            for level, code in geo_summary_levels:
                # merge summary profile into current geo profile
                merge_dicts(data[section],
                            get_election_data(code, level, election, session),
                            level)

            # tweaks to make the data nicer
            # show 8 largest parties on their own and group the rest as 'Other'
            group_remainder(data[section]['party_distribution'], 9)

        if geo_level == 'country':
            add_elections_media_coverage(data)

        return data
    finally:
        session.close()
예제 #12
0
def get_census_profile(geo_code, geo_level):
    session = get_session()

    try:
        geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session)
        data = {}

        for section in PROFILE_SECTIONS:
            function_name = 'get_%s_profile' % section
            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo_code, geo_level, session)

                # get profiles for province and/or country
                for level, code in geo_summary_levels:
                    # merge summary profile into current geo profile
                    merge_dicts(data[section], func(code, level, session), level)

        # tweaks to make the data nicer
        # show X largest groups on their own and group the rest as 'Other'
        group_remainder(data['households']['roofing_material_distribution'], 5)
        group_remainder(data['households']['wall_material_distribution'], 5)

        return data

    finally:
        session.close()
예제 #13
0
def get_geography(geo_code, geo_level):
    """
    Get a geography model (Ward, Province, etc.) for this geography, or
    raise LocationNotFound if it doesn't exist.
    """
    session = get_session()

    try:
        try:
            model = {
                'ward': Ward,
                'district': District,
                'municipality': Municipality,
                'province': Province,
                'country': Country,
            }[geo_level]
        except KeyError:
            raise LocationNotFound(geo_code)

        geo = session.query(model).get(geo_code)
        if not geo:
            raise LocationNotFound(geo_code)

        return geo
    finally:
        session.close()
def get_elections_profile(geo_code, geo_level):
    data = OrderedDict()
    session = get_session()
    try:
        geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session)

        for election in ELECTIONS:
            section = election['name'].lower().replace(' ', '_')
            data[section] = get_election_data(geo_code, geo_level, election, session)

            # get profiles for province and/or country
            for level, code in geo_summary_levels:
                # merge summary profile into current geo profile
                merge_dicts(data[section], get_election_data(code, level, election, session), level)

            # tweaks to make the data nicer
            # show 8 largest parties on their own and group the rest as 'Other'
            group_remainder(data[section]['party_distribution'], 9)

        if geo_level == 'country':
            add_elections_media_coverage(data)

        return data
    finally:
        session.close()
예제 #15
0
    def children(self):
        if not self.child_level:
            return []

        session = get_session()
        try:
            model = get_geo_model(self.child_level)
            return session.query(model).filter(getattr(model, '%s_code' % self.level) == self.code).all()
        finally:
            session.close()
예제 #16
0
    def children(self):
        if not self.child_level:
            return []

        session = get_session()
        try:
            model = get_geo_model(self.child_level)
            return session.query(model).filter(getattr(model, '%s_code' % self.level) == self.code).all()
        finally:
            session.close()
    def store_values(self):
        session = get_session()
        province_codes = dict(
            (p.name, p.code) for p in session.query(Province))
        session.commit()

        # cache of the db models for each geo level
        models = {}
        count = 0

        for geo_name, values in self.read_rows():
            count += 1
            geo_level = self.determine_level(geo_name)

            print geo_level, geo_name

            if geo_level == 'province':
                code = province_codes[geo_name]
            elif geo_level == 'country':
                code = 'ZA'
            else:
                code = geo_name.split(':')[0]
            base_kwargs = {'%s_code' % geo_level: code}

            # get db model and create table if necessary
            if geo_level in models:
                db_model = models[geo_level]
            else:
                if self.table_name:
                    table_name = self.table_name + '_' + geo_level
                else:
                    table_name = None

                models[geo_level] = db_model = get_model_from_fields(
                    self.fields, geo_level, table_name)
                Base.metadata.create_all(_engine, tables=[db_model.__table__])

            for category, value in zip(self.categories, values):
                # prepare the dict of args to pass to the db model for this row
                kwargs = base_kwargs.copy()
                if value.strip() == '-':
                    value = '0'

                kwargs.update(
                    dict((f, v) for f, v in zip(self.fields, category)))
                kwargs['total'] = int(value.replace(',', ''))

                # create and add the row
                session.add(db_model(**kwargs))

            if count % 100 == 0:
                session.flush()

        session.commit()
        session.close()
예제 #18
0
def get_locations(search_term, geo_level=None, year="2011"):
    if geo_level is not None and geo_level not in geo_levels:
        raise ValueError("Invalid geo_level: %s" % geo_level)

    session = get_session()
    try:
        if geo_level:
            levels = [geo_level]
        else:
            levels = ["country", "province", "municipality", "subplace"]

        objects = set()

        # search at each level
        for level in levels:
            # already checked that geo_level is valid
            model = get_geo_model(level)

            if level == "subplace":
                # check mainplace and subplace names
                objects.update(
                    session.query(Ward)
                    .join(model)
                    .filter(model.year == year)
                    .filter(
                        or_(
                            model.subplace_name.ilike(search_term + "%"),
                            model.subplace_name.ilike("City of %s" % search_term + "%"),
                            model.mainplace_name.ilike(search_term + "%"),
                            model.code == search_term,
                        )
                    )
                    .limit(10)
                )

            else:
                objects.update(
                    session.query(model)
                    .filter(model.year == year)
                    .filter(
                        or_(
                            model.name.ilike(search_term + "%"),
                            model.name.ilike("City of %s" % search_term + "%"),
                            model.code == search_term.upper(),
                        )
                    )
                    .limit(10)
                )

        order_map = {Country: 4, Ward: 3, Municipality: 2, Province: 1}
        objects = sorted(objects, key=lambda o: [order_map[o.__class__], getattr(o, "name", getattr(o, "code"))])

        return serialize_demarcations(objects[0:10])
    finally:
        session.close()
예제 #19
0
    def _build_model_from_fields(self, fields, table_name, geo_level=None):
        '''
        Generates an ORM model for arbitrary census fields by geography.

        :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector']
        :param str table_name: the name of the database table
        :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province', or None if the table doesn't use them
        :return: ORM model class containing the given fields with type String(128), a 'total' field
        with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code')
        :rtype: Model
        '''
        if table_name in _census_table_models:
            return _census_table_models[table_name]

        # We build this array in a particular order, with the geo-related fields first,
        # to ensure that SQLAlchemy creates the underlying table with the compound primary
        # key columns in the correct order:
        #
        #  geo_level, geo_code, field, [field, field, ...]
        #
        # This means postgresql will use the first two elements of the compound primary
        # key -- geo_level and geo_code -- when looking up values for a particular
        # geograhy. This saves us from having to create a secondary index.
        table_args = []

        if geo_level:
            # primary/foreign keys
            table_args.append(Column('%s_code' % geo_level, String(10),
                                     ForeignKey('%s.code' % geo_level),
                                     primary_key=True, index=True))
        else:
            # will form a compound primary key on the fields, and the geo id
            table_args.append(Column('geo_level', String(15), nullable=False, primary_key=True))
            table_args.append(Column('geo_code', String(10), nullable=False, primary_key=True))

        # Now add the columns
        table_args.extend(Column(field, String(128), primary_key=True) for field in fields)

        # and the value column
        table_args.append(Column('total', Integer, nullable=False))

        # create the table model
        class Model(Base):
            __table__ = Table(table_name, Base.metadata, *table_args)
        _census_table_models[table_name] = Model

        # ensure it exists in the DB
        session = get_session()
        try:
            Model.__table__.create(session.get_bind(), checkfirst=True)
        finally:
            session.close()

        return Model
예제 #20
0
    def counties_for_coordinates(self, lat, lng):
        places = self.places_for_coordinates(lat, lng)
        area_codes = [obj['id'] for obj in places if obj['type'] == 'O04']
        if not area_codes:
            return []

        session = get_session()
        try:
            return session.query(County).filter(County.osm_area_id.in_(area_codes)).all()
        finally:
            session.close()
예제 #21
0
def get_locations(search_term, geo_level=None, year='2011'):
    if geo_level is not None and geo_level not in geo_levels:
        raise ValueError('Invalid geo_level: %s' % geo_level)

    session = get_session()
    try:
        if geo_level:
            levels = [geo_level]
        else:
            levels = ['country', 'province', 'municipality', 'subplace']

        objects = set()

        # search at each level
        for level in levels:
            # already checked that geo_level is valid
            model = {
                'municipality': Municipality,
                'province': Province,
                'subplace': Subplace,
                'country': Country,
            }[level]

            if level == 'subplace':
                # check mainplace and subplace names
                objects.update(session
                    .query(Ward)
                    .join(model)
                    .filter(model.year == year)
                    .filter(or_(model.subplace_name.ilike(search_term + '%'),
                                model.subplace_name.ilike('City of %s' % search_term + '%'),
                                model.mainplace_name.ilike(search_term + '%'),
                                model.code == search_term))
                    .limit(10)
                )

            else:
                objects.update(session
                    .query(model)
                    .filter(model.year == year)
                    .filter(or_(model.name.ilike(search_term + '%'),
                                model.name.ilike('City of %s' % search_term + '%'),
                                model.code == search_term.upper()))
                    .limit(10)
                )


        order_map = {Country: 4, Ward: 3, Municipality: 2, Province: 1}
        objects = sorted(objects, key=lambda o: [order_map[o.__class__], getattr(o, 'name', getattr(o, 'code'))])

        return serialize_demarcations(objects[0:10])
    finally:
        session.close()
예제 #22
0
    def do(self):
        print('Syncing projects...')

        projects = Project.objects.all()
        for project in projects:
            sheet, worksheet = get_session(project.url, project.worksheet)
            size, progress = get_status(worksheet)

            project.size = size
            project.progress = progress

            project.save()
예제 #23
0
    def store_values(self):
        session = get_session()
        province_codes = dict((p.name, p.code) for p in session.query(Province))
        session.commit()

        # cache of the db models for each geo level
        models = {}
        count = 0

        for geo_name, values in self.read_rows():
            count += 1
            geo_level = self.determine_level(geo_name)

            print geo_level, geo_name

            if geo_level == 'province':
                code = province_codes[geo_name]
            elif geo_name == 'country':
                code = 'ZA'
            else:
                code = geo_name.split(':')[0]
            base_kwargs = {'%s_code' % geo_level: code}

            # get db model and create table if necessary
            if geo_level in models:
                db_model = models[geo_level]
            else:
                if self.table_name:
                    table_name = self.table_name + '_' + geo_level
                else:
                    table_name = None

                models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name)
                Base.metadata.create_all(_engine, tables=[db_model.__table__])

            for category, value in zip(self.categories, values):
                # prepare the dict of args to pass to the db model for this row
                kwargs = base_kwargs.copy()
                if value.strip() == '-':
                    value = '0'

                kwargs.update(dict((f, v) for f, v in zip(self.fields, category)))
                kwargs['total'] = int(value.replace(',', ''))

                # create and add the row
                session.add(db_model(**kwargs))

            if count % 100 == 0:
                session.flush()

        session.commit()
        session.close()
    def store_values(self):
        session = get_session()
        county_codes = dict((p.name.upper().replace('-', ' '), p.code) for p in session.query(County))
        session.commit()

        # cache of the db models for each geo level
        models = {}
        count = 0

        for geo_level, geo_name, category, total in self.read_rows():
            count += 1

            print geo_level, geo_name, category, total

            if geo_level == 'county':
                code = county_codes[geo_name.upper()]
            elif geo_level == 'country':
                code = 'KE'
            else:
                raise ValueError(geo_level)

            base_kwargs = {'%s_code' % geo_level: code} if code else {}

            # get db model and create table if necessary
            if geo_level in models:
                db_model = models[geo_level]
            else:
                if self.table_name:
                    table_name = self.table_name + '_' + geo_level
                else:
                    table_name = None

                try:
                    models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name)
                except ValueError as e:
                    raise ValueError('%s. Have you declared this field in a table in censusreporter/api/models/tables.py?' % e.message)
                Base.metadata.create_all(_engine, tables=[db_model.__table__])
                self.table_names.append(db_model.__table__.name)

            # prepare the dict of args to pass to the db model for this row
            kwargs = base_kwargs.copy()
            kwargs.update(dict((f, c) for f, c in zip(self.fields, category)))
            kwargs['total'] = total

            # create and add the row
            session.add(db_model(**kwargs))

            if count % 100 == 0:
                session.flush()

        session.commit()
        session.close()
예제 #25
0
    def create_project(self, user, url, worksheet):
        sheet, worksheet = get_session(url, worksheet)
        size, progress = get_status(worksheet)

        project = Project.objects.create(user=user,
                                         name=sheet.title,
                                         status='requested',
                                         url=url,
                                         gid=sheet.id,
                                         worksheet=worksheet.title,
                                         size=size,
                                         progress=progress)

        return project
def open_elections_csv(filepath):
    f = open(filepath)
    reader = csv.DictReader(f)
    session = get_session()

    for values in reader:
        mapped_values = dict(
            (field_mapper[k][0], field_mapper[k][1](v, session) if field_mapper[k][1] is not None else v)
            for k, v in values.iteritems()
        )
        yield mapped_values

    session.close()
    f.close()
예제 #27
0
def open_elections_csv(filepath):
    f = open(filepath, '-rU')
    reader = csv.DictReader(f)
    session = get_session()

    for values in reader:
        mapped_values = dict(
            (field_mapper[k][0], field_mapper[k][1]
             (v, session) if field_mapper[k][1] is not None else v)
            for k, v in values.iteritems())
        yield mapped_values

    session.close()
    f.close()
예제 #28
0
def _hook_release_created(**kwargs):
    if kwargs.get('created'):
        release = kwargs['instance']
        # append release lifecycle logs to the app
        release.app.log(release.summary)

        for deploy_hook in settings.DRYCC_DEPLOY_HOOK_URLS:
            url = deploy_hook
            params = {
                'app': release.app,
                'release': 'v{}'.format(release.version),
                'release_summary': release.summary,
                'sha': '',
                'user': release.owner,
            }
            if release.build is not None:
                params['sha'] = release.build.sha

            # order of the query arguments is important when computing the HMAC auth secret
            params = sorted(params.items())
            url += '?{}'.format(urllib.parse.urlencode(params))

            headers = {}
            if settings.DRYCC_DEPLOY_HOOK_SECRET_KEY is not None:
                headers['Authorization'] = hmac.new(
                    settings.DRYCC_DEPLOY_HOOK_SECRET_KEY.encode('utf-8'),
                    url.encode('utf-8'), hashlib.sha1).hexdigest()

            try:
                get_session().post(url, headers=headers)
                # just notify with the base URL, disregard the added URL query
                release.app.log('Deploy hook sent to {}'.format(deploy_hook))
            except requests.RequestException as e:
                release.app.log(
                    'An error occurred while sending the deploy hook to {}: {}'
                    .format(deploy_hook, e), logging.ERROR)
예제 #29
0
def get_crime_profile(geo_code, geo_level):
    session = get_session()

    try:
        geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session)
        data = {}
        data['crime'] = get_crime_breakdown_profile(geo_code, geo_level, session)

        # get profiles for province and/or country
        for level, code in geo_summary_levels:
            # merge summary profile into current geo profile
            merge_dicts(data['crime'], get_crime_breakdown_profile(code, level, session), level)

        return data

    finally:
        session.close()
예제 #30
0
def get_elections_profile(geo_code, geo_level):
    data = {}
    session = get_session()
    try:
        for election in AVAILABLE_ELECTIONS:
            election_data = get_election_data(geo_code, geo_level, election, session)
            data[election_data['key']] = election_data

        if geo_level == 'country':
            add_elections_media_coverage(data)

        return data

    except KeyError:
        raise ValueError('Invalid geo_level: %s' % geo_level)
    finally:
        session.close()
예제 #31
0
def get_census_profile(geo_code, geo_level):
    session = get_session()

    try:
        geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session)
        data = {}

        sections = list(PROFILE_SECTIONS)
        if geo_level in ['country', 'province']:
            sections.append('crime')

        for section in sections:
            function_name = 'get_%s_profile' % section
            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo_code, geo_level, session)

                # get profiles for province and/or country
                for level, code in geo_summary_levels:
                    # merge summary profile into current geo profile
                    merge_dicts(data[section], func(code, level, session),
                                level)

        # tweaks to make the data nicer
        # show 3 largest groups on their own and group the rest as 'Other'
        group_remainder(data['service_delivery']['water_source_distribution'],
                        5)
        group_remainder(
            data['service_delivery']['refuse_disposal_distribution'], 5)
        group_remainder(
            data['service_delivery']['toilet_facilities_distribution'], 5)
        group_remainder(data['demographics']['language_distribution'], 7)
        group_remainder(data['demographics']['province_of_birth_distribution'],
                        7)
        group_remainder(data['demographics']['region_of_birth_distribution'],
                        5)
        group_remainder(data['households']['type_of_dwelling_distribution'], 5)
        group_remainder(
            data['child_households']['type_of_dwelling_distribution'], 5)

        return data

    finally:
        session.close()
예제 #32
0
def build_model_from_fields(fields, geo_level, table_name=None):
    '''
    Generates an ORM model for arbitrary census fields by geography.

    :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector']
    :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province'
    :param str table_name: the name of the database table, if different from the default table
    :return: ORM model class containing the given fields with type String(128), a 'total' field
    with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code')
    :rtype: Model
    '''
    if table_name is None:
        table_name = get_table_name(fields, geo_level)
    if table_name in _census_table_models:
        return _census_table_models[table_name]

    field_columns = [
        Column(field, String(128), primary_key=True) for field in fields
    ]

    # foreign keys
    field_columns.append(
        Column('%s_code' % geo_level,
               String(5),
               ForeignKey('%s.code' % geo_level),
               primary_key=True,
               index=True))

    class Model(Base):
        __table__ = Table(table_name, Base.metadata,
                          Column('total', Integer, nullable=False),
                          *field_columns)

    _census_table_models[table_name] = Model

    session = get_session()
    try:
        Model.__table__.create(session.get_bind(), checkfirst=True)
    finally:
        session.close()

    return Model
예제 #33
0
def get_geography(geo_code, geo_level):
    """
    Get a geography model (Ward, Province, etc.) for this geography, or
    raise LocationNotFound if it doesn't exist.
    """
    session = get_session()

    try:
        try:
            model = get_geo_model(geo_level)
        except KeyError:
            raise LocationNotFound('Invalid level: %s' % geo_level)

        geo = session.query(model).get(geo_code)
        if not geo:
            raise LocationNotFound('Invalid level and code: %s-%s' % (geo_level, geo_code))

        return geo
    finally:
        session.close()
def get_locations(search_term, levels=None, year='2009'):
    """
    Try to find locations based on a search term, possibly limited
    to +levels+.

    Returns an ordered list of geo models.
    """
    if levels:
        levels = levels.split(',')
        for level in levels:
            if not level in geo_levels:
                raise ValueError('Invalid geolevel: %s' % level)
    else:
        levels = geo_levels

    search_term = search_term.strip()
    session = get_session()
    try:
        objects = set()

        # search at each level
        for level in levels:
            # already checked that geo_level is valid
            model = get_geo_model(level)

            objects.update(session
                .query(model)
                .filter(model.year == year)
                .filter(or_(model.name.ilike(search_term + '%'),
                            model.code == search_term.upper()))
                .limit(10)
            )

        order_map = {County: 0, Country: 1}
        objects = sorted(objects, key=lambda o: [order_map[o.__class__], o.name, o.code])

        return [o.as_dict() for o in objects[0:10]]
    finally:
        session.close()
예제 #35
0
def get_census_profile(geo_code, geo_level):
    geo_code, geo_level = 'EC', 'province'
    session = get_session()

    try:
        geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session)
        data = {}

        sections = list(PROFILE_SECTIONS)
        if geo_level in ['country', 'province']:
            sections.append('crime')

        for section in sections:
            function_name = 'get_%s_profile' % section
            if function_name in globals():
                func = globals()[function_name]
                data[section] = func(geo_code, geo_level, session)

                # get profiles for province and/or country
                for level, code in geo_summary_levels:
                    # merge summary profile into current geo profile
                    merge_dicts(data[section], func(code, level, session), level)

        # tweaks to make the data nicer
        # show 3 largest groups on their own and group the rest as 'Other'
        #group_remainder(data['service_delivery']['water_source_distribution'], 5)
        #group_remainder(data['service_delivery']['refuse_disposal_distribution'], 5)
        #group_remainder(data['service_delivery']['toilet_facilities_distribution'], 5)
        group_remainder(data['demographics']['language_distribution'], 7)
        group_remainder(data['demographics']['province_of_birth_distribution'], 7)
        group_remainder(data['demographics']['region_of_birth_distribution'], 5)
        #group_remainder(data['households']['type_of_dwelling_distribution'], 5)
        #group_remainder(data['child_households']['type_of_dwelling_distribution'], 5)
        
        return data

    finally:
        session.close()
예제 #36
0
def build_model_from_fields(fields, geo_level, table_name=None):
    '''
    Generates an ORM model for arbitrary census fields by geography.

    :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector']
    :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province'
    :param str table_name: the name of the database table, if different from the default table
    :return: ORM model class containing the given fields with type String(128), a 'total' field
    with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code')
    :rtype: Model
    '''
    if table_name is None:
        table_name = get_table_name(fields, geo_level)
    if table_name in _census_table_models:
        return _census_table_models[table_name]

    field_columns = [Column(field, String(128), primary_key=True)
                     for field in fields]

    # foreign keys
    field_columns.append(Column('%s_code' % geo_level, String(8),
                                ForeignKey('%s.code' % geo_level),
                                primary_key=True, index=True))

    class Model(Base):
        __table__ = Table(table_name, Base.metadata,
            Column('total', Integer, nullable=False),
            *field_columns
        )
    _census_table_models[table_name] = Model

    session = get_session()
    try:
        Model.__table__.create(session.get_bind(), checkfirst=True)
    finally:
        session.close()

    return Model
예제 #37
0
    def import_districts(self):
        session = get_session()

        with open(filepath) as f:
            reader = csv.reader(f, delimiter=",")
            # skip header
            next(reader)

            for line in reader:
                province_code, name = line[0:2]
                code = self.district_name_to_code(name.lower())

                print name, code, province_code

                geo = PoliceDistrict()
                geo.name = name
                geo.code = code
                geo.year = '2005'
                geo.province_code = province_code

                session.add(geo)

        session.commit()
예제 #38
0
    def import_districts(self):
        session = get_session()

        with open(filepath) as f:
            reader = csv.reader(f, delimiter=",")
            # skip header
            next(reader)

            for line in reader:
                province_code, name = line[0:2]
                code = self.district_name_to_code(name.lower())

                print name, code, province_code

                geo = PoliceDistrict()
                geo.name = name
                geo.code = code
                geo.year = '2005'
                geo.province_code = province_code

                session.add(geo)

        session.commit()
예제 #39
0
    def get_stat_data(self,
                      geo_level,
                      geo_code,
                      fields=None,
                      key_order=None,
                      percent=True,
                      total=None,
                      recode=None):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param str geo_level: the geographical level
        :param str geo_code: the geographical code
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.

        :return: (data-dictionary, total)
        """

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in self.columns:
                        raise ValueError(
                            "Invalid field/column '%s' for table '%s'. Valid columns are: %s"
                            % (f, self.id, ', '.join(self.columns.keys())))
            else:
                fields = self.columns.keys()

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in self.columns:
                raise ValueError(
                    "Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s"
                    % (total, self.id, ', '.join(self.columns.keys())))

            # table columns to fetch
            cols = [self.model.columns[c] for c in fields]
            if total is not None and isinstance(
                    total, basestring) and total not in cols:
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = session\
                .query(*cols)\
                .filter(self.model.c.geo_level == geo_level,
                        self.model.c.geo_code == geo_code)\
                .first()

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = key_order or fields  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(
                    key,
                    {'name': recode.get(field, self.columns[field]['name'])})

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get('numerators', {}).get('this', 0)
                    field_info['values'] = {'this': p(val, total)}
                    field_info['numerators'] = {'this': val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get('values', {}).get('this', 0)
                    field_info['values'] = {'this': val}

            add_metadata(results, self)
            return results, total
        finally:
            session.close()
예제 #40
0
    "SPECIAL VOTES": ('special_votes', parse_integer),
    "VALID VOTES": ('valid_votes', parse_integer),
    "SPOILT VOTES": ('spoilt_votes', parse_integer),
    "% VOTER TURNOUT": ('voter_turnout', lambda val: float(val.rstrip('%'))),
}

if __name__ == '__main__':
    if len(sys.argv) != 2:
        raise ValueError("Requires 1 file path argument")
    filepath = sys.argv[1]
    if not os.path.isabs(filepath):
        filepath = os.path.join(os.getcwd(), filepath)

    # create table if necessary
    Base.metadata.create_all(_engine, tables=[Votes.__table__])
    session = get_session()

    with open(filepath) as f:
        reader = csv.DictReader(f, encoding='CP949')

        total = 1064463
        i = 0
        for values in reader:
            mapped_values = dict(
                (field_mapper[k][0], field_mapper[k][1]
                 (val) if field_mapper[k][1] is not None else val)
                for k, val in values.iteritems())
            district_code = None
            mapped_values['district_code'] = district_code
            mapped_values['mec7_votes'] = None
            mapped_values['ballot_type'] = None
예제 #41
0
    def get_stat_data(self, geo_level, geo_code, fields=None, key_order=None,
                      percent=True, total=None, recode=None):
        """ Get a data dictionary for a place from this table.

        This fetches the values for each column in this table and returns a data
        dictionary for those values, with appropriate names and metadata.

        :param str geo_level: the geographical level
        :param str geo_code: the geographical code
        :param str or list fields: the columns to fetch stats for. By default, all columns except
                                   geo-related and the total column (if any) are used.
        :param str key_order: explicit ordering of (recoded) keys, or None for the default order.
                              Default order is the order in +fields+ if given, otherwise
                              it's the natural column order from the DB.
        :param bool percent: should we calculate percentages, or just include raw values?
        :param int total: the total value to use for percentages, name of a
                          field, or None to use the sum of all retrieved fields (default)
        :param dict recode: map from field names to strings to recode column names. Many fields
                            can be recoded to the same thing, their values will be summed.

        :return: (data-dictionary, total)
        """

        session = get_session()
        try:
            if fields is not None and not isinstance(fields, list):
                fields = [fields]
            if fields:
                for f in fields:
                    if f not in self.columns:
                        raise ValueError("Invalid field/column '%s' for table '%s'. Valid columns are: %s" % (
                            f, self.id, ', '.join(self.columns.keys())))
            else:
                fields = self.columns.keys()

            recode = recode or {}
            if recode:
                # change lambda to dicts
                if not isinstance(recode, dict):
                    recode = {f: recode(f) for f in fields}

            # is the total column valid?
            if isinstance(total, basestring) and total not in self.columns:
                raise ValueError("Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % (
                    total, self.id, ', '.join(self.columns.keys())))

            # table columns to fetch
            cols = [self.model.columns[c] for c in fields]
            if total is not None and isinstance(total, basestring) and total not in cols:
                cols.append(total)

            # do the query. If this returns no data, row is None
            row = session\
                .query(*cols)\
                .filter(self.model.c.geo_level == geo_level,
                        self.model.c.geo_code == geo_code)\
                .first()

            if row is None:
                row = ZeroRow()

            # what's our denominator?
            if total is None:
                # sum of all columns
                total = sum(getattr(row, f) or 0 for f in fields)
            elif isinstance(total, basestring):
                total = getattr(row, total)

            # Now build a data dictionary based on the columns in +row+.
            # Multiple columns may be recoded into one, so we have to
            # accumulate values as we go.
            results = OrderedDict()

            key_order = key_order or fields  # default key order is just the list of fields

            for field in key_order:
                val = getattr(row, field) or 0

                # recode the key for this field, default is to keep it the same
                key = recode.get(field, field)

                # set the recoded field name, noting that the key may already
                # exist if another column recoded to it
                field_info = results.setdefault(key, {'name': recode.get(field, self.columns[field]['name'])})

                if percent:
                    # sum up existing values, if any
                    val = val + field_info.get('numerators', {}).get('this', 0)
                    field_info['values'] = {'this': p(val, total)}
                    field_info['numerators'] = {'this': val}
                else:
                    # sum up existing values, if any
                    val = val + field_info.get('values', {}).get('this', 0)
                    field_info['values'] = {'this': val}

            add_metadata(results, self)
            return results, total
        finally:
            session.close()

def clean_str(var):
    out = '\N'
    if var:
        out = str(var)
    return out

if __name__ == '__main__':
    if len(sys.argv) != 2:
        raise ValueError("Requires 'electoral event' argument")
    election = sys.argv[1]

    Base.metadata.create_all(_engine, tables=[VoteSummary.__table__])

    session = get_session()

    try:
        total = (1 + session.query(Province).count() + session.query(District).count() + \
            session.query(Municipality).count() + session.query(Ward).count()) * 3
        counter = 0.0

        sys.stdout.write('COPY votesummary (geo_level, geo_code, electoral_event, party, '
                         'ballot_type, registered_voters, total_votes, mec7_votes, section_24a_votes, special_votes, '
                         'valid_votes, spoilt_votes, average_voter_turnout) FROM stdin;\n')

        for geo_model in (None, Province, District, Municipality, Ward):
            if geo_model is None:
                level = 'country'
                codes = [('ZA', )]
            else:
예제 #43
0
def get_locations(search_term, levels=None, year='2011'):
    if levels:
        levels = levels.split(',')
        for level in levels:
            if not level in geo_levels:
                raise ValueError('Invalid geolevel: %s' % level)
    else:
        levels = ['country', 'province', 'municipality', 'ward', 'subplace']

    search_term = search_term.strip()
    session = get_session()
    try:
        objects = set()

        # search at each level
        for level in levels:
            # already checked that geo_level is valid
            model = get_geo_model(level)

            if level == 'subplace':
                # check mainplace and subplace names
                objects.update(session
                    .query(Ward)
                    .join(model)
                    .filter(model.year == year)
                    .filter(or_(model.subplace_name.ilike(search_term + '%'),
                                model.subplace_name.ilike('City of %s' % search_term + '%'),
                                model.mainplace_name.ilike(search_term + '%'),
                                model.code == search_term))
                    .limit(10)
                )
            elif level == 'ward':
                st = search_term.lower().strip('ward').strip()

                filters = [model.code.like(st + '%')]
                try:
                    filters.append(model.ward_no == int(st))
                except ValueError as e:
                    pass

                objects.update(session
                    .query(model)
                    .filter(model.year == year)
                    .filter(or_(*filters))
                    .limit(10)
                )
            else:
                objects.update(session
                    .query(model)
                    .filter(model.year == year)
                    .filter(or_(model.name.ilike(search_term + '%'),
                                model.name.ilike('City of %s' % search_term + '%'),
                                model.code == search_term.upper()))
                    .limit(10)
                )


        order_map = {Country: 4, Ward: 3, Municipality: 2, Province: 1}
        objects = sorted(objects, key=lambda o: [order_map[o.__class__], getattr(o, 'name', getattr(o, 'code'))])

        return serialize_demarcations(objects[0:10])
    finally:
        session.close()
예제 #44
0
    def setup_columns(self):
        """
        Prepare our columns for use by +as_dict+ and the data API.

        Each 'column' is actually a unique value for each of this table's +fields+.
        """
        self.build_models()

        # Each "column" is a unique permutation of the values
        # of this table's fields, including rollups. The ordering of the
        # columns is important since columns heirarchical, but are returned
        # "flat".
        #
        # Here's an example. Suppose our table has the following values:
        #
        #     5 years, male, 129
        #     5 years, female, 131
        #     10 years, male, 221
        #     10 years, female, 334
        #
        # This would produce the following columns (indented to show nesting)
        #
        # 5 years:
        #   male
        # 5 years:
        #   female
        # 10 years:
        #   male
        # 10 years:
        #   female

        # map from column id to column info.
        self.total_column = self.column_id([self.denominator_key or 'total'])
        self.columns = OrderedDict()
        self.columns[self.total_column] = {'name': 'Total', 'indent': 0}

        session = get_session()
        try:
            model = self.get_model('country')
            fields = [getattr(model, f) for f in self.fields]

            # get distinct permutations for all fields
            rows = session\
                .query(*fields)\
                .order_by(*fields)\
                .distinct()\
                .all()

            def permute(indent, field_values, rows):
                field = self.fields[indent - 1]
                last = indent == len(self.fields)

                for val, rows in groupby(rows, lambda r: getattr(r, field)):
                    # this is used to calculate the column id
                    new_values = field_values + [val]
                    col_id = self.column_id(new_values)

                    self.columns[col_id] = {
                        'name': capitalize(val) + ('' if last else ':'),
                        'indent': 0 if col_id == self.total_column else indent,
                    }

                    if not last:
                        permute(indent + 1, new_values, rows)

            permute(1, [], rows)
        finally:
            session.close()
예제 #45
0
    def raw_data_for_geos(self, geos):
        """
        Pull raw data for a list of geo models.

        Returns a dict mapping the geo ids to table data.
        """
        data = {}

        # group by geo level
        geos = sorted(geos, key=lambda g: g.level)
        for geo_level, geos in groupby(geos, lambda g: g.level):
            model = self.get_model(geo_level)
            geo_codes = [g.code for g in geos]

            if self.table_per_level:
                code = '%s_code' % geo_level
            else:
                code = 'geo_code'
            code_attr = getattr(model, code)

            # initial values
            for geo_code in geo_codes:
                data['%s-%s' % (geo_level, geo_code)] = {
                    'estimate': {},
                    'error': {}
                }

            session = get_session()
            try:
                geo_values = None
                fields = [getattr(model, f) for f in self.fields]
                rows = session\
                    .query(code_attr,
                           func.sum(model.total).label('total'),
                           *fields)\
                    .group_by(code_attr, *fields)\
                    .order_by(code_attr, *fields)\
                    .filter(code_attr.in_(geo_codes))

                if not self.table_per_level:
                    rows = rows.filter(model.geo_level == geo_level)

                rows = rows.all()

                def permute(level, field_keys, rows):
                    field = self.fields[level]
                    total = 0
                    denominator = 0

                    for key, rows in groupby(rows,
                                             lambda r: getattr(r, field)):
                        new_keys = field_keys + [key]
                        col_id = self.column_id(new_keys)

                        if level + 1 < len(self.fields):
                            count = permute(level + 1, new_keys, rows)
                        else:
                            # we've bottomed out
                            count = sum(row.total for row in rows)

                            if self.denominator_key and self.denominator_key == key:
                                # this row must be used as the denominator total,
                                # rather than as an entry in the table
                                denominator = count
                                continue

                        total += count
                        geo_values['estimate'][col_id] = count
                        geo_values['error'][col_id] = 0

                    if self.denominator_key:
                        total = denominator

                    return total

                # rows for each geo
                for geo_code, geo_rows in groupby(rows,
                                                  lambda r: getattr(r, code)):
                    geo_values = data['%s-%s' % (geo_level, geo_code)]
                    total = permute(0, [], geo_rows)

                    # total
                    geo_values['estimate'][self.total_column] = total
                    geo_values['error'][self.total_column] = 0

            finally:
                session.close()

        return data
예제 #46
0
    def _build_model_from_fields(self, fields, table_name, geo_level=None):
        '''
        Generates an ORM model for arbitrary census fields by geography.

        :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector']
        :param str table_name: the name of the database table
        :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province', or None if the table doesn't use them
        :return: ORM model class containing the given fields with type String(128), a 'total' field
        with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code')
        :rtype: Model
        '''
        if table_name in _census_table_models:
            return _census_table_models[table_name]

        # We build this array in a particular order, with the geo-related fields first,
        # to ensure that SQLAlchemy creates the underlying table with the compound primary
        # key columns in the correct order:
        #
        #  geo_level, geo_code, field, [field, field, ...]
        #
        # This means postgresql will use the first two elements of the compound primary
        # key -- geo_level and geo_code -- when looking up values for a particular
        # geograhy. This saves us from having to create a secondary index.
        table_args = []

        if geo_level:
            # primary/foreign keys
            table_args.append(
                Column('%s_code' % geo_level,
                       String(10),
                       ForeignKey('%s.code' % geo_level),
                       primary_key=True,
                       index=True))
        else:
            # will form a compound primary key on the fields, and the geo id
            table_args.append(
                Column('geo_level',
                       String(15),
                       nullable=False,
                       primary_key=True))
            table_args.append(
                Column('geo_code',
                       String(10),
                       nullable=False,
                       primary_key=True))

        # Now add the columns
        table_args.extend(
            Column(field, String(128), primary_key=True) for field in fields)

        # and the value column
        table_args.append(Column('total', Integer, nullable=False))

        # create the table model
        class Model(Base):
            __table__ = Table(table_name, Base.metadata, *table_args)

        _census_table_models[table_name] = Model

        # ensure it exists in the DB
        session = get_session()
        try:
            Model.__table__.create(session.get_bind(), checkfirst=True)
        finally:
            session.close()

        return Model
예제 #47
0
    def setup_columns(self):
        """
        Prepare our columns for use by +as_dict+ and the data API.

        Each 'column' is actually a unique value for each of this table's +fields+.
        """
        self.build_models()

        # Each "column" is a unique permutation of the values
        # of this table's fields, including rollups. The ordering of the
        # columns is important since columns heirarchical, but are returned
        # "flat".
        #
        # Here's an example. Suppose our table has the following values:
        #
        #     5 years, male, 129
        #     5 years, female, 131
        #     10 years, male, 221
        #     10 years, female, 334
        #
        # This would produce the following columns (indented to show nesting)
        #
        # 5 years:
        #   male
        # 5 years:
        #   female
        # 10 years:
        #   male
        # 10 years:
        #   female

        # map from column id to column info.
        self.total_column = self.column_id([self.denominator_key or 'total'])
        self.columns = OrderedDict()
        self.columns[self.total_column] = {'name': 'Total', 'indent': 0}

        session = get_session()
        try:
            model = self.get_model('country')
            fields = [getattr(model, f) for f in self.fields]

            # get distinct permutations for all fields
            rows = session\
                .query(*fields)\
                .order_by(*fields)\
                .distinct()\
                .all()

            def permute(indent, field_values, rows):
                field = self.fields[indent - 1]
                last = indent == len(self.fields)

                for val, rows in groupby(rows, lambda r: getattr(r, field)):
                    # this is used to calculate the column id
                    new_values = field_values + [val]
                    col_id = self.column_id(new_values)

                    self.columns[col_id] = {
                        'name': capitalize(val) + ('' if last else ':'),
                        'indent': 0 if col_id == self.total_column else indent,
                    }

                    if not last:
                        permute(indent + 1, new_values, rows)

            permute(1, [], rows)
        finally:
            session.close()
예제 #48
0
    def raw_data_for_geos(self, geos):
        """
        Pull raw data for a list of geo models.

        Returns a dict mapping the geo ids to table data.
        """
        data = {}

        # group by geo level
        geos = sorted(geos, key=lambda g: g.level)
        for geo_level, geos in groupby(geos, lambda g: g.level):
            model = self.get_model(geo_level)
            geo_codes = [g.code for g in geos]

            if self.table_per_level:
                code = '%s_code' % geo_level
            else:
                code = 'geo_code'
            code_attr = getattr(model, code)

            # initial values
            for geo_code in geo_codes:
                data['%s-%s' % (geo_level, geo_code)] = {
                    'estimate': {},
                    'error': {}}

            session = get_session()
            try:
                geo_values = None
                fields = [getattr(model, f) for f in self.fields]
                rows = session\
                    .query(code_attr,
                           func.sum(model.total).label('total'),
                           *fields)\
                    .group_by(code_attr, *fields)\
                    .order_by(code_attr, *fields)\
                    .filter(code_attr.in_(geo_codes))

                if not self.table_per_level:
                    rows = rows.filter(model.geo_level == geo_level)

                rows = rows.all()

                def permute(level, field_keys, rows):
                    field = self.fields[level]
                    total = 0
                    denominator = 0

                    for key, rows in groupby(rows, lambda r: getattr(r, field)):
                        new_keys = field_keys + [key]
                        col_id = self.column_id(new_keys)

                        if level + 1 < len(self.fields):
                            count = permute(level + 1, new_keys, rows)
                        else:
                            # we've bottomed out
                            count = sum(row.total for row in rows)

                            if self.denominator_key and self.denominator_key == key:
                                # this row must be used as the denominator total,
                                # rather than as an entry in the table
                                denominator = count
                                continue

                        total += count
                        geo_values['estimate'][col_id] = count
                        geo_values['error'][col_id] = 0

                    if self.denominator_key:
                        total = denominator

                    return total

                # rows for each geo
                for geo_code, geo_rows in groupby(rows, lambda r: getattr(r, code)):
                    geo_values = data['%s-%s' % (geo_level, geo_code)]
                    total = permute(0, [], geo_rows)

                    # total
                    geo_values['estimate'][self.total_column] = total
                    geo_values['error'][self.total_column] = 0

            finally:
                session.close()

        return data