def raw_data_for_geos(self, geos): data = {} # group by geo level geos = sorted(geos, key=lambda g: g.level) for geo_level, geos in groupby(geos, lambda g: g.level): geo_codes = [g.code for g in geos] # initial values for geo_code in geo_codes: data['%s-%s' % (geo_level, geo_code)] = { 'estimate': {}, 'error': {} } session = get_session() try: geo_values = None rows = session\ .query(self.model)\ .filter(self.model.c.geo_level == geo_level)\ .filter(self.model.c.geo_code.in_(geo_codes))\ .all() for row in rows: geo_values = data['%s-%s' % (geo_level, row.geo_code)] for col in self.columns.iterkeys(): geo_values['estimate'][col] = getattr(row, col) geo_values['error'][col] = 0 finally: session.close() return data
def get_census_profile(geo_code, geo_level): session = get_session() try: geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session) data = {} for section in PROFILE_SECTIONS: function_name = 'get_%s_profile' % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo_code, geo_level, session) # get profiles for province and/or country for level, code in geo_summary_levels: # merge summary profile into current geo profile merge_dicts(data[section], func(code, level, session), level) # tweaks to make the data nicer # show 3 largest groups on their own and group the rest as 'Other' group_remainder(data['service_delivery']['water_source_distribution']) group_remainder(data['service_delivery']['refuse_disposal_distribution']) group_remainder(data['service_delivery']['toilet_facilities_distribution'], 5) group_remainder(data['demographics']['language_distribution'], 7) return data finally: session.close()
def raw_data_for_geos(self, geos): data = {} # group by geo level geos = sorted(geos, key=lambda g: g.level) for geo_level, geos in groupby(geos, lambda g: g.level): geo_codes = [g.code for g in geos] # initial values for geo_code in geo_codes: data['%s-%s' % (geo_level, geo_code)] = { 'estimate': {}, 'error': {}} session = get_session() try: geo_values = None rows = session\ .query(self.table)\ .filter(self.table.c.geo_level == geo_level)\ .filter(self.table.c.geo_code.in_(geo_codes))\ .all() for row in rows: geo_values = data['%s-%s' % (geo_level, row.geo_code)] for col in self.columns.iterkeys(): geo_values['estimate'][col] = getattr(row, col) geo_values['error'][col] = 0 finally: session.close() return data
def get_locations_from_coords(longitude, latitude): ''' Calls the Wards API to get a single ward containing the coordinates. Returns the serialized ward, municipality and province. ''' location = ward_search_api.search("%s,%s" % (latitude, longitude)) if len(location) == 0: return [] # there should only be 1 ward since wards don't overlap location = location[0] session = get_session() try: ward = session.query(Ward).get(location.ward_code) if ward is None: return [] # this is the reverse order of a normal search - the # narrowest location match comes first. objects = [ward, ward.municipality, ward.province, ward.country] objects = filter(lambda o: bool(o), objects) # remove None return serialize_demarcations(objects) finally: session.close()
def main(): """if len(sys.argv) != 2: raise ValueError("Requires 1 file path argument")""" filepath = '/Users/kooshag/Downloads/sina_1.csv' if not os.path.isabs(filepath): filepath = os.path.join(os.getcwd(), filepath) # create table if necessary Base.metadata.create_all(_engine, tables=[Votes.__table__]) session = get_session() total = 474395 for i, values in enumerate(open_elections_csv(filepath)): values['district_code'] = session.query(Municipality) \ .get(values['municipality_code']) \ .district_code values['section_24a_votes'] = None values['special_votes'] = None session.add(Votes(**values)) if i % 1000 == 0: session.flush() sys.stdout.write('\r%s of %s' % (i + 1, total)) sys.stdout.flush() print '\nDone' session.commit() session.close()
def import_crimes(self): session = get_session() table = get_datatable(self.table_id) model = table.get_model(self.geo_level) geo_code_attr = '%s_code' % self.geo_level with open(filepath) as f: reader = csv.DictReader(f, delimiter=",") for row in reader: args = {} for key, val in row.iteritems(): key = key.lower() if key == 'geo_code': args[geo_code_attr] = val elif key == 'total': args['total'] = int(val) else: args[key] = val item = model(**args) session.add(item) session.commit()
def do(self): print('Enriching projects...') do = DiscoverOrgClient() projects = Project.objects.filter(status='running bots') for project in projects: sheet, worksheet = get_session(project.url, project.worksheet) contacts = worksheet.get_all_records() for contact in contacts: if contact['Status'] != 'Completed': contact = do.enrich(contact) if type(contact) is str: continue else: continue for k, v in contact.items(): row = contacts.index(contact) + 2 col = worksheet.find(k).col worksheet.update_cell(row, col, v) time.sleep(1) time.sleep(3) project.status = 'in progress' project.save()
def get_elections_profile(geo_code, geo_level): data = {} session = get_session() try: geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session) for election in AVAILABLE_ELECTIONS: section = election['name'].lower().replace(' ', '_') data[section] = get_election_data(geo_code, geo_level, election, session) # get profiles for province and/or country for level, code in geo_summary_levels: # merge summary profile into current geo profile merge_dicts(data[section], get_election_data(code, level, election, session), level) # tweaks to make the data nicer # show 8 largest parties on their own and group the rest as 'Other' group_remainder(data[section]['party_distribution'], 9) if geo_level == 'country': add_elections_media_coverage(data) return data finally: session.close()
def get_census_profile(geo_code, geo_level): session = get_session() try: geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session) data = {} for section in PROFILE_SECTIONS: function_name = 'get_%s_profile' % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo_code, geo_level, session) # get profiles for province and/or country for level, code in geo_summary_levels: # merge summary profile into current geo profile merge_dicts(data[section], func(code, level, session), level) # tweaks to make the data nicer # show X largest groups on their own and group the rest as 'Other' group_remainder(data['households']['roofing_material_distribution'], 5) group_remainder(data['households']['wall_material_distribution'], 5) return data finally: session.close()
def get_geography(geo_code, geo_level): """ Get a geography model (Ward, Province, etc.) for this geography, or raise LocationNotFound if it doesn't exist. """ session = get_session() try: try: model = { 'ward': Ward, 'district': District, 'municipality': Municipality, 'province': Province, 'country': Country, }[geo_level] except KeyError: raise LocationNotFound(geo_code) geo = session.query(model).get(geo_code) if not geo: raise LocationNotFound(geo_code) return geo finally: session.close()
def get_elections_profile(geo_code, geo_level): data = OrderedDict() session = get_session() try: geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session) for election in ELECTIONS: section = election['name'].lower().replace(' ', '_') data[section] = get_election_data(geo_code, geo_level, election, session) # get profiles for province and/or country for level, code in geo_summary_levels: # merge summary profile into current geo profile merge_dicts(data[section], get_election_data(code, level, election, session), level) # tweaks to make the data nicer # show 8 largest parties on their own and group the rest as 'Other' group_remainder(data[section]['party_distribution'], 9) if geo_level == 'country': add_elections_media_coverage(data) return data finally: session.close()
def children(self): if not self.child_level: return [] session = get_session() try: model = get_geo_model(self.child_level) return session.query(model).filter(getattr(model, '%s_code' % self.level) == self.code).all() finally: session.close()
def store_values(self): session = get_session() province_codes = dict( (p.name, p.code) for p in session.query(Province)) session.commit() # cache of the db models for each geo level models = {} count = 0 for geo_name, values in self.read_rows(): count += 1 geo_level = self.determine_level(geo_name) print geo_level, geo_name if geo_level == 'province': code = province_codes[geo_name] elif geo_level == 'country': code = 'ZA' else: code = geo_name.split(':')[0] base_kwargs = {'%s_code' % geo_level: code} # get db model and create table if necessary if geo_level in models: db_model = models[geo_level] else: if self.table_name: table_name = self.table_name + '_' + geo_level else: table_name = None models[geo_level] = db_model = get_model_from_fields( self.fields, geo_level, table_name) Base.metadata.create_all(_engine, tables=[db_model.__table__]) for category, value in zip(self.categories, values): # prepare the dict of args to pass to the db model for this row kwargs = base_kwargs.copy() if value.strip() == '-': value = '0' kwargs.update( dict((f, v) for f, v in zip(self.fields, category))) kwargs['total'] = int(value.replace(',', '')) # create and add the row session.add(db_model(**kwargs)) if count % 100 == 0: session.flush() session.commit() session.close()
def get_locations(search_term, geo_level=None, year="2011"): if geo_level is not None and geo_level not in geo_levels: raise ValueError("Invalid geo_level: %s" % geo_level) session = get_session() try: if geo_level: levels = [geo_level] else: levels = ["country", "province", "municipality", "subplace"] objects = set() # search at each level for level in levels: # already checked that geo_level is valid model = get_geo_model(level) if level == "subplace": # check mainplace and subplace names objects.update( session.query(Ward) .join(model) .filter(model.year == year) .filter( or_( model.subplace_name.ilike(search_term + "%"), model.subplace_name.ilike("City of %s" % search_term + "%"), model.mainplace_name.ilike(search_term + "%"), model.code == search_term, ) ) .limit(10) ) else: objects.update( session.query(model) .filter(model.year == year) .filter( or_( model.name.ilike(search_term + "%"), model.name.ilike("City of %s" % search_term + "%"), model.code == search_term.upper(), ) ) .limit(10) ) order_map = {Country: 4, Ward: 3, Municipality: 2, Province: 1} objects = sorted(objects, key=lambda o: [order_map[o.__class__], getattr(o, "name", getattr(o, "code"))]) return serialize_demarcations(objects[0:10]) finally: session.close()
def _build_model_from_fields(self, fields, table_name, geo_level=None): ''' Generates an ORM model for arbitrary census fields by geography. :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector'] :param str table_name: the name of the database table :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province', or None if the table doesn't use them :return: ORM model class containing the given fields with type String(128), a 'total' field with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code') :rtype: Model ''' if table_name in _census_table_models: return _census_table_models[table_name] # We build this array in a particular order, with the geo-related fields first, # to ensure that SQLAlchemy creates the underlying table with the compound primary # key columns in the correct order: # # geo_level, geo_code, field, [field, field, ...] # # This means postgresql will use the first two elements of the compound primary # key -- geo_level and geo_code -- when looking up values for a particular # geograhy. This saves us from having to create a secondary index. table_args = [] if geo_level: # primary/foreign keys table_args.append(Column('%s_code' % geo_level, String(10), ForeignKey('%s.code' % geo_level), primary_key=True, index=True)) else: # will form a compound primary key on the fields, and the geo id table_args.append(Column('geo_level', String(15), nullable=False, primary_key=True)) table_args.append(Column('geo_code', String(10), nullable=False, primary_key=True)) # Now add the columns table_args.extend(Column(field, String(128), primary_key=True) for field in fields) # and the value column table_args.append(Column('total', Integer, nullable=False)) # create the table model class Model(Base): __table__ = Table(table_name, Base.metadata, *table_args) _census_table_models[table_name] = Model # ensure it exists in the DB session = get_session() try: Model.__table__.create(session.get_bind(), checkfirst=True) finally: session.close() return Model
def counties_for_coordinates(self, lat, lng): places = self.places_for_coordinates(lat, lng) area_codes = [obj['id'] for obj in places if obj['type'] == 'O04'] if not area_codes: return [] session = get_session() try: return session.query(County).filter(County.osm_area_id.in_(area_codes)).all() finally: session.close()
def get_locations(search_term, geo_level=None, year='2011'): if geo_level is not None and geo_level not in geo_levels: raise ValueError('Invalid geo_level: %s' % geo_level) session = get_session() try: if geo_level: levels = [geo_level] else: levels = ['country', 'province', 'municipality', 'subplace'] objects = set() # search at each level for level in levels: # already checked that geo_level is valid model = { 'municipality': Municipality, 'province': Province, 'subplace': Subplace, 'country': Country, }[level] if level == 'subplace': # check mainplace and subplace names objects.update(session .query(Ward) .join(model) .filter(model.year == year) .filter(or_(model.subplace_name.ilike(search_term + '%'), model.subplace_name.ilike('City of %s' % search_term + '%'), model.mainplace_name.ilike(search_term + '%'), model.code == search_term)) .limit(10) ) else: objects.update(session .query(model) .filter(model.year == year) .filter(or_(model.name.ilike(search_term + '%'), model.name.ilike('City of %s' % search_term + '%'), model.code == search_term.upper())) .limit(10) ) order_map = {Country: 4, Ward: 3, Municipality: 2, Province: 1} objects = sorted(objects, key=lambda o: [order_map[o.__class__], getattr(o, 'name', getattr(o, 'code'))]) return serialize_demarcations(objects[0:10]) finally: session.close()
def do(self): print('Syncing projects...') projects = Project.objects.all() for project in projects: sheet, worksheet = get_session(project.url, project.worksheet) size, progress = get_status(worksheet) project.size = size project.progress = progress project.save()
def store_values(self): session = get_session() province_codes = dict((p.name, p.code) for p in session.query(Province)) session.commit() # cache of the db models for each geo level models = {} count = 0 for geo_name, values in self.read_rows(): count += 1 geo_level = self.determine_level(geo_name) print geo_level, geo_name if geo_level == 'province': code = province_codes[geo_name] elif geo_name == 'country': code = 'ZA' else: code = geo_name.split(':')[0] base_kwargs = {'%s_code' % geo_level: code} # get db model and create table if necessary if geo_level in models: db_model = models[geo_level] else: if self.table_name: table_name = self.table_name + '_' + geo_level else: table_name = None models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name) Base.metadata.create_all(_engine, tables=[db_model.__table__]) for category, value in zip(self.categories, values): # prepare the dict of args to pass to the db model for this row kwargs = base_kwargs.copy() if value.strip() == '-': value = '0' kwargs.update(dict((f, v) for f, v in zip(self.fields, category))) kwargs['total'] = int(value.replace(',', '')) # create and add the row session.add(db_model(**kwargs)) if count % 100 == 0: session.flush() session.commit() session.close()
def store_values(self): session = get_session() county_codes = dict((p.name.upper().replace('-', ' '), p.code) for p in session.query(County)) session.commit() # cache of the db models for each geo level models = {} count = 0 for geo_level, geo_name, category, total in self.read_rows(): count += 1 print geo_level, geo_name, category, total if geo_level == 'county': code = county_codes[geo_name.upper()] elif geo_level == 'country': code = 'KE' else: raise ValueError(geo_level) base_kwargs = {'%s_code' % geo_level: code} if code else {} # get db model and create table if necessary if geo_level in models: db_model = models[geo_level] else: if self.table_name: table_name = self.table_name + '_' + geo_level else: table_name = None try: models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name) except ValueError as e: raise ValueError('%s. Have you declared this field in a table in censusreporter/api/models/tables.py?' % e.message) Base.metadata.create_all(_engine, tables=[db_model.__table__]) self.table_names.append(db_model.__table__.name) # prepare the dict of args to pass to the db model for this row kwargs = base_kwargs.copy() kwargs.update(dict((f, c) for f, c in zip(self.fields, category))) kwargs['total'] = total # create and add the row session.add(db_model(**kwargs)) if count % 100 == 0: session.flush() session.commit() session.close()
def create_project(self, user, url, worksheet): sheet, worksheet = get_session(url, worksheet) size, progress = get_status(worksheet) project = Project.objects.create(user=user, name=sheet.title, status='requested', url=url, gid=sheet.id, worksheet=worksheet.title, size=size, progress=progress) return project
def open_elections_csv(filepath): f = open(filepath) reader = csv.DictReader(f) session = get_session() for values in reader: mapped_values = dict( (field_mapper[k][0], field_mapper[k][1](v, session) if field_mapper[k][1] is not None else v) for k, v in values.iteritems() ) yield mapped_values session.close() f.close()
def open_elections_csv(filepath): f = open(filepath, '-rU') reader = csv.DictReader(f) session = get_session() for values in reader: mapped_values = dict( (field_mapper[k][0], field_mapper[k][1] (v, session) if field_mapper[k][1] is not None else v) for k, v in values.iteritems()) yield mapped_values session.close() f.close()
def _hook_release_created(**kwargs): if kwargs.get('created'): release = kwargs['instance'] # append release lifecycle logs to the app release.app.log(release.summary) for deploy_hook in settings.DRYCC_DEPLOY_HOOK_URLS: url = deploy_hook params = { 'app': release.app, 'release': 'v{}'.format(release.version), 'release_summary': release.summary, 'sha': '', 'user': release.owner, } if release.build is not None: params['sha'] = release.build.sha # order of the query arguments is important when computing the HMAC auth secret params = sorted(params.items()) url += '?{}'.format(urllib.parse.urlencode(params)) headers = {} if settings.DRYCC_DEPLOY_HOOK_SECRET_KEY is not None: headers['Authorization'] = hmac.new( settings.DRYCC_DEPLOY_HOOK_SECRET_KEY.encode('utf-8'), url.encode('utf-8'), hashlib.sha1).hexdigest() try: get_session().post(url, headers=headers) # just notify with the base URL, disregard the added URL query release.app.log('Deploy hook sent to {}'.format(deploy_hook)) except requests.RequestException as e: release.app.log( 'An error occurred while sending the deploy hook to {}: {}' .format(deploy_hook, e), logging.ERROR)
def get_crime_profile(geo_code, geo_level): session = get_session() try: geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session) data = {} data['crime'] = get_crime_breakdown_profile(geo_code, geo_level, session) # get profiles for province and/or country for level, code in geo_summary_levels: # merge summary profile into current geo profile merge_dicts(data['crime'], get_crime_breakdown_profile(code, level, session), level) return data finally: session.close()
def get_elections_profile(geo_code, geo_level): data = {} session = get_session() try: for election in AVAILABLE_ELECTIONS: election_data = get_election_data(geo_code, geo_level, election, session) data[election_data['key']] = election_data if geo_level == 'country': add_elections_media_coverage(data) return data except KeyError: raise ValueError('Invalid geo_level: %s' % geo_level) finally: session.close()
def get_census_profile(geo_code, geo_level): session = get_session() try: geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session) data = {} sections = list(PROFILE_SECTIONS) if geo_level in ['country', 'province']: sections.append('crime') for section in sections: function_name = 'get_%s_profile' % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo_code, geo_level, session) # get profiles for province and/or country for level, code in geo_summary_levels: # merge summary profile into current geo profile merge_dicts(data[section], func(code, level, session), level) # tweaks to make the data nicer # show 3 largest groups on their own and group the rest as 'Other' group_remainder(data['service_delivery']['water_source_distribution'], 5) group_remainder( data['service_delivery']['refuse_disposal_distribution'], 5) group_remainder( data['service_delivery']['toilet_facilities_distribution'], 5) group_remainder(data['demographics']['language_distribution'], 7) group_remainder(data['demographics']['province_of_birth_distribution'], 7) group_remainder(data['demographics']['region_of_birth_distribution'], 5) group_remainder(data['households']['type_of_dwelling_distribution'], 5) group_remainder( data['child_households']['type_of_dwelling_distribution'], 5) return data finally: session.close()
def build_model_from_fields(fields, geo_level, table_name=None): ''' Generates an ORM model for arbitrary census fields by geography. :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector'] :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province' :param str table_name: the name of the database table, if different from the default table :return: ORM model class containing the given fields with type String(128), a 'total' field with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code') :rtype: Model ''' if table_name is None: table_name = get_table_name(fields, geo_level) if table_name in _census_table_models: return _census_table_models[table_name] field_columns = [ Column(field, String(128), primary_key=True) for field in fields ] # foreign keys field_columns.append( Column('%s_code' % geo_level, String(5), ForeignKey('%s.code' % geo_level), primary_key=True, index=True)) class Model(Base): __table__ = Table(table_name, Base.metadata, Column('total', Integer, nullable=False), *field_columns) _census_table_models[table_name] = Model session = get_session() try: Model.__table__.create(session.get_bind(), checkfirst=True) finally: session.close() return Model
def get_geography(geo_code, geo_level): """ Get a geography model (Ward, Province, etc.) for this geography, or raise LocationNotFound if it doesn't exist. """ session = get_session() try: try: model = get_geo_model(geo_level) except KeyError: raise LocationNotFound('Invalid level: %s' % geo_level) geo = session.query(model).get(geo_code) if not geo: raise LocationNotFound('Invalid level and code: %s-%s' % (geo_level, geo_code)) return geo finally: session.close()
def get_locations(search_term, levels=None, year='2009'): """ Try to find locations based on a search term, possibly limited to +levels+. Returns an ordered list of geo models. """ if levels: levels = levels.split(',') for level in levels: if not level in geo_levels: raise ValueError('Invalid geolevel: %s' % level) else: levels = geo_levels search_term = search_term.strip() session = get_session() try: objects = set() # search at each level for level in levels: # already checked that geo_level is valid model = get_geo_model(level) objects.update(session .query(model) .filter(model.year == year) .filter(or_(model.name.ilike(search_term + '%'), model.code == search_term.upper())) .limit(10) ) order_map = {County: 0, Country: 1} objects = sorted(objects, key=lambda o: [order_map[o.__class__], o.name, o.code]) return [o.as_dict() for o in objects[0:10]] finally: session.close()
def get_census_profile(geo_code, geo_level): geo_code, geo_level = 'EC', 'province' session = get_session() try: geo_summary_levels = get_summary_geo_info(geo_code, geo_level, session) data = {} sections = list(PROFILE_SECTIONS) if geo_level in ['country', 'province']: sections.append('crime') for section in sections: function_name = 'get_%s_profile' % section if function_name in globals(): func = globals()[function_name] data[section] = func(geo_code, geo_level, session) # get profiles for province and/or country for level, code in geo_summary_levels: # merge summary profile into current geo profile merge_dicts(data[section], func(code, level, session), level) # tweaks to make the data nicer # show 3 largest groups on their own and group the rest as 'Other' #group_remainder(data['service_delivery']['water_source_distribution'], 5) #group_remainder(data['service_delivery']['refuse_disposal_distribution'], 5) #group_remainder(data['service_delivery']['toilet_facilities_distribution'], 5) group_remainder(data['demographics']['language_distribution'], 7) group_remainder(data['demographics']['province_of_birth_distribution'], 7) group_remainder(data['demographics']['region_of_birth_distribution'], 5) #group_remainder(data['households']['type_of_dwelling_distribution'], 5) #group_remainder(data['child_households']['type_of_dwelling_distribution'], 5) return data finally: session.close()
def build_model_from_fields(fields, geo_level, table_name=None): ''' Generates an ORM model for arbitrary census fields by geography. :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector'] :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province' :param str table_name: the name of the database table, if different from the default table :return: ORM model class containing the given fields with type String(128), a 'total' field with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code') :rtype: Model ''' if table_name is None: table_name = get_table_name(fields, geo_level) if table_name in _census_table_models: return _census_table_models[table_name] field_columns = [Column(field, String(128), primary_key=True) for field in fields] # foreign keys field_columns.append(Column('%s_code' % geo_level, String(8), ForeignKey('%s.code' % geo_level), primary_key=True, index=True)) class Model(Base): __table__ = Table(table_name, Base.metadata, Column('total', Integer, nullable=False), *field_columns ) _census_table_models[table_name] = Model session = get_session() try: Model.__table__.create(session.get_bind(), checkfirst=True) finally: session.close() return Model
def import_districts(self): session = get_session() with open(filepath) as f: reader = csv.reader(f, delimiter=",") # skip header next(reader) for line in reader: province_code, name = line[0:2] code = self.district_name_to_code(name.lower()) print name, code, province_code geo = PoliceDistrict() geo.name = name geo.code = code geo.year = '2005' geo.province_code = province_code session.add(geo) session.commit()
def get_stat_data(self, geo_level, geo_code, fields=None, key_order=None, percent=True, total=None, recode=None): """ Get a data dictionary for a place from this table. This fetches the values for each column in this table and returns a data dictionary for those values, with appropriate names and metadata. :param str geo_level: the geographical level :param str geo_code: the geographical code :param str or list fields: the columns to fetch stats for. By default, all columns except geo-related and the total column (if any) are used. :param str key_order: explicit ordering of (recoded) keys, or None for the default order. Default order is the order in +fields+ if given, otherwise it's the natural column order from the DB. :param bool percent: should we calculate percentages, or just include raw values? :param int total: the total value to use for percentages, name of a field, or None to use the sum of all retrieved fields (default) :param dict recode: map from field names to strings to recode column names. Many fields can be recoded to the same thing, their values will be summed. :return: (data-dictionary, total) """ session = get_session() try: if fields is not None and not isinstance(fields, list): fields = [fields] if fields: for f in fields: if f not in self.columns: raise ValueError( "Invalid field/column '%s' for table '%s'. Valid columns are: %s" % (f, self.id, ', '.join(self.columns.keys()))) else: fields = self.columns.keys() recode = recode or {} if recode: # change lambda to dicts if not isinstance(recode, dict): recode = {f: recode(f) for f in fields} # is the total column valid? if isinstance(total, basestring) and total not in self.columns: raise ValueError( "Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % (total, self.id, ', '.join(self.columns.keys()))) # table columns to fetch cols = [self.model.columns[c] for c in fields] if total is not None and isinstance( total, basestring) and total not in cols: cols.append(total) # do the query. If this returns no data, row is None row = session\ .query(*cols)\ .filter(self.model.c.geo_level == geo_level, self.model.c.geo_code == geo_code)\ .first() if row is None: row = ZeroRow() # what's our denominator? if total is None: # sum of all columns total = sum(getattr(row, f) or 0 for f in fields) elif isinstance(total, basestring): total = getattr(row, total) # Now build a data dictionary based on the columns in +row+. # Multiple columns may be recoded into one, so we have to # accumulate values as we go. results = OrderedDict() key_order = key_order or fields # default key order is just the list of fields for field in key_order: val = getattr(row, field) or 0 # recode the key for this field, default is to keep it the same key = recode.get(field, field) # set the recoded field name, noting that the key may already # exist if another column recoded to it field_info = results.setdefault( key, {'name': recode.get(field, self.columns[field]['name'])}) if percent: # sum up existing values, if any val = val + field_info.get('numerators', {}).get('this', 0) field_info['values'] = {'this': p(val, total)} field_info['numerators'] = {'this': val} else: # sum up existing values, if any val = val + field_info.get('values', {}).get('this', 0) field_info['values'] = {'this': val} add_metadata(results, self) return results, total finally: session.close()
"SPECIAL VOTES": ('special_votes', parse_integer), "VALID VOTES": ('valid_votes', parse_integer), "SPOILT VOTES": ('spoilt_votes', parse_integer), "% VOTER TURNOUT": ('voter_turnout', lambda val: float(val.rstrip('%'))), } if __name__ == '__main__': if len(sys.argv) != 2: raise ValueError("Requires 1 file path argument") filepath = sys.argv[1] if not os.path.isabs(filepath): filepath = os.path.join(os.getcwd(), filepath) # create table if necessary Base.metadata.create_all(_engine, tables=[Votes.__table__]) session = get_session() with open(filepath) as f: reader = csv.DictReader(f, encoding='CP949') total = 1064463 i = 0 for values in reader: mapped_values = dict( (field_mapper[k][0], field_mapper[k][1] (val) if field_mapper[k][1] is not None else val) for k, val in values.iteritems()) district_code = None mapped_values['district_code'] = district_code mapped_values['mec7_votes'] = None mapped_values['ballot_type'] = None
def get_stat_data(self, geo_level, geo_code, fields=None, key_order=None, percent=True, total=None, recode=None): """ Get a data dictionary for a place from this table. This fetches the values for each column in this table and returns a data dictionary for those values, with appropriate names and metadata. :param str geo_level: the geographical level :param str geo_code: the geographical code :param str or list fields: the columns to fetch stats for. By default, all columns except geo-related and the total column (if any) are used. :param str key_order: explicit ordering of (recoded) keys, or None for the default order. Default order is the order in +fields+ if given, otherwise it's the natural column order from the DB. :param bool percent: should we calculate percentages, or just include raw values? :param int total: the total value to use for percentages, name of a field, or None to use the sum of all retrieved fields (default) :param dict recode: map from field names to strings to recode column names. Many fields can be recoded to the same thing, their values will be summed. :return: (data-dictionary, total) """ session = get_session() try: if fields is not None and not isinstance(fields, list): fields = [fields] if fields: for f in fields: if f not in self.columns: raise ValueError("Invalid field/column '%s' for table '%s'. Valid columns are: %s" % ( f, self.id, ', '.join(self.columns.keys()))) else: fields = self.columns.keys() recode = recode or {} if recode: # change lambda to dicts if not isinstance(recode, dict): recode = {f: recode(f) for f in fields} # is the total column valid? if isinstance(total, basestring) and total not in self.columns: raise ValueError("Total column '%s' isn't one of the columns for table '%s'. Valid columns are: %s" % ( total, self.id, ', '.join(self.columns.keys()))) # table columns to fetch cols = [self.model.columns[c] for c in fields] if total is not None and isinstance(total, basestring) and total not in cols: cols.append(total) # do the query. If this returns no data, row is None row = session\ .query(*cols)\ .filter(self.model.c.geo_level == geo_level, self.model.c.geo_code == geo_code)\ .first() if row is None: row = ZeroRow() # what's our denominator? if total is None: # sum of all columns total = sum(getattr(row, f) or 0 for f in fields) elif isinstance(total, basestring): total = getattr(row, total) # Now build a data dictionary based on the columns in +row+. # Multiple columns may be recoded into one, so we have to # accumulate values as we go. results = OrderedDict() key_order = key_order or fields # default key order is just the list of fields for field in key_order: val = getattr(row, field) or 0 # recode the key for this field, default is to keep it the same key = recode.get(field, field) # set the recoded field name, noting that the key may already # exist if another column recoded to it field_info = results.setdefault(key, {'name': recode.get(field, self.columns[field]['name'])}) if percent: # sum up existing values, if any val = val + field_info.get('numerators', {}).get('this', 0) field_info['values'] = {'this': p(val, total)} field_info['numerators'] = {'this': val} else: # sum up existing values, if any val = val + field_info.get('values', {}).get('this', 0) field_info['values'] = {'this': val} add_metadata(results, self) return results, total finally: session.close()
def clean_str(var): out = '\N' if var: out = str(var) return out if __name__ == '__main__': if len(sys.argv) != 2: raise ValueError("Requires 'electoral event' argument") election = sys.argv[1] Base.metadata.create_all(_engine, tables=[VoteSummary.__table__]) session = get_session() try: total = (1 + session.query(Province).count() + session.query(District).count() + \ session.query(Municipality).count() + session.query(Ward).count()) * 3 counter = 0.0 sys.stdout.write('COPY votesummary (geo_level, geo_code, electoral_event, party, ' 'ballot_type, registered_voters, total_votes, mec7_votes, section_24a_votes, special_votes, ' 'valid_votes, spoilt_votes, average_voter_turnout) FROM stdin;\n') for geo_model in (None, Province, District, Municipality, Ward): if geo_model is None: level = 'country' codes = [('ZA', )] else:
def get_locations(search_term, levels=None, year='2011'): if levels: levels = levels.split(',') for level in levels: if not level in geo_levels: raise ValueError('Invalid geolevel: %s' % level) else: levels = ['country', 'province', 'municipality', 'ward', 'subplace'] search_term = search_term.strip() session = get_session() try: objects = set() # search at each level for level in levels: # already checked that geo_level is valid model = get_geo_model(level) if level == 'subplace': # check mainplace and subplace names objects.update(session .query(Ward) .join(model) .filter(model.year == year) .filter(or_(model.subplace_name.ilike(search_term + '%'), model.subplace_name.ilike('City of %s' % search_term + '%'), model.mainplace_name.ilike(search_term + '%'), model.code == search_term)) .limit(10) ) elif level == 'ward': st = search_term.lower().strip('ward').strip() filters = [model.code.like(st + '%')] try: filters.append(model.ward_no == int(st)) except ValueError as e: pass objects.update(session .query(model) .filter(model.year == year) .filter(or_(*filters)) .limit(10) ) else: objects.update(session .query(model) .filter(model.year == year) .filter(or_(model.name.ilike(search_term + '%'), model.name.ilike('City of %s' % search_term + '%'), model.code == search_term.upper())) .limit(10) ) order_map = {Country: 4, Ward: 3, Municipality: 2, Province: 1} objects = sorted(objects, key=lambda o: [order_map[o.__class__], getattr(o, 'name', getattr(o, 'code'))]) return serialize_demarcations(objects[0:10]) finally: session.close()
def setup_columns(self): """ Prepare our columns for use by +as_dict+ and the data API. Each 'column' is actually a unique value for each of this table's +fields+. """ self.build_models() # Each "column" is a unique permutation of the values # of this table's fields, including rollups. The ordering of the # columns is important since columns heirarchical, but are returned # "flat". # # Here's an example. Suppose our table has the following values: # # 5 years, male, 129 # 5 years, female, 131 # 10 years, male, 221 # 10 years, female, 334 # # This would produce the following columns (indented to show nesting) # # 5 years: # male # 5 years: # female # 10 years: # male # 10 years: # female # map from column id to column info. self.total_column = self.column_id([self.denominator_key or 'total']) self.columns = OrderedDict() self.columns[self.total_column] = {'name': 'Total', 'indent': 0} session = get_session() try: model = self.get_model('country') fields = [getattr(model, f) for f in self.fields] # get distinct permutations for all fields rows = session\ .query(*fields)\ .order_by(*fields)\ .distinct()\ .all() def permute(indent, field_values, rows): field = self.fields[indent - 1] last = indent == len(self.fields) for val, rows in groupby(rows, lambda r: getattr(r, field)): # this is used to calculate the column id new_values = field_values + [val] col_id = self.column_id(new_values) self.columns[col_id] = { 'name': capitalize(val) + ('' if last else ':'), 'indent': 0 if col_id == self.total_column else indent, } if not last: permute(indent + 1, new_values, rows) permute(1, [], rows) finally: session.close()
def raw_data_for_geos(self, geos): """ Pull raw data for a list of geo models. Returns a dict mapping the geo ids to table data. """ data = {} # group by geo level geos = sorted(geos, key=lambda g: g.level) for geo_level, geos in groupby(geos, lambda g: g.level): model = self.get_model(geo_level) geo_codes = [g.code for g in geos] if self.table_per_level: code = '%s_code' % geo_level else: code = 'geo_code' code_attr = getattr(model, code) # initial values for geo_code in geo_codes: data['%s-%s' % (geo_level, geo_code)] = { 'estimate': {}, 'error': {} } session = get_session() try: geo_values = None fields = [getattr(model, f) for f in self.fields] rows = session\ .query(code_attr, func.sum(model.total).label('total'), *fields)\ .group_by(code_attr, *fields)\ .order_by(code_attr, *fields)\ .filter(code_attr.in_(geo_codes)) if not self.table_per_level: rows = rows.filter(model.geo_level == geo_level) rows = rows.all() def permute(level, field_keys, rows): field = self.fields[level] total = 0 denominator = 0 for key, rows in groupby(rows, lambda r: getattr(r, field)): new_keys = field_keys + [key] col_id = self.column_id(new_keys) if level + 1 < len(self.fields): count = permute(level + 1, new_keys, rows) else: # we've bottomed out count = sum(row.total for row in rows) if self.denominator_key and self.denominator_key == key: # this row must be used as the denominator total, # rather than as an entry in the table denominator = count continue total += count geo_values['estimate'][col_id] = count geo_values['error'][col_id] = 0 if self.denominator_key: total = denominator return total # rows for each geo for geo_code, geo_rows in groupby(rows, lambda r: getattr(r, code)): geo_values = data['%s-%s' % (geo_level, geo_code)] total = permute(0, [], geo_rows) # total geo_values['estimate'][self.total_column] = total geo_values['error'][self.total_column] = 0 finally: session.close() return data
def _build_model_from_fields(self, fields, table_name, geo_level=None): ''' Generates an ORM model for arbitrary census fields by geography. :param list fields: the census fields in `api.models.tables.FIELD_TABLE_FIELDS`, e.g. ['highest educational level', 'type of sector'] :param str table_name: the name of the database table :param str geo_level: one of the geographics levels defined in `api.base.geo_levels`, e.g. 'province', or None if the table doesn't use them :return: ORM model class containing the given fields with type String(128), a 'total' field with type Integer and '%(geo_level)s_code' with type ForeignKey('%(geo_level)s.code') :rtype: Model ''' if table_name in _census_table_models: return _census_table_models[table_name] # We build this array in a particular order, with the geo-related fields first, # to ensure that SQLAlchemy creates the underlying table with the compound primary # key columns in the correct order: # # geo_level, geo_code, field, [field, field, ...] # # This means postgresql will use the first two elements of the compound primary # key -- geo_level and geo_code -- when looking up values for a particular # geograhy. This saves us from having to create a secondary index. table_args = [] if geo_level: # primary/foreign keys table_args.append( Column('%s_code' % geo_level, String(10), ForeignKey('%s.code' % geo_level), primary_key=True, index=True)) else: # will form a compound primary key on the fields, and the geo id table_args.append( Column('geo_level', String(15), nullable=False, primary_key=True)) table_args.append( Column('geo_code', String(10), nullable=False, primary_key=True)) # Now add the columns table_args.extend( Column(field, String(128), primary_key=True) for field in fields) # and the value column table_args.append(Column('total', Integer, nullable=False)) # create the table model class Model(Base): __table__ = Table(table_name, Base.metadata, *table_args) _census_table_models[table_name] = Model # ensure it exists in the DB session = get_session() try: Model.__table__.create(session.get_bind(), checkfirst=True) finally: session.close() return Model
def raw_data_for_geos(self, geos): """ Pull raw data for a list of geo models. Returns a dict mapping the geo ids to table data. """ data = {} # group by geo level geos = sorted(geos, key=lambda g: g.level) for geo_level, geos in groupby(geos, lambda g: g.level): model = self.get_model(geo_level) geo_codes = [g.code for g in geos] if self.table_per_level: code = '%s_code' % geo_level else: code = 'geo_code' code_attr = getattr(model, code) # initial values for geo_code in geo_codes: data['%s-%s' % (geo_level, geo_code)] = { 'estimate': {}, 'error': {}} session = get_session() try: geo_values = None fields = [getattr(model, f) for f in self.fields] rows = session\ .query(code_attr, func.sum(model.total).label('total'), *fields)\ .group_by(code_attr, *fields)\ .order_by(code_attr, *fields)\ .filter(code_attr.in_(geo_codes)) if not self.table_per_level: rows = rows.filter(model.geo_level == geo_level) rows = rows.all() def permute(level, field_keys, rows): field = self.fields[level] total = 0 denominator = 0 for key, rows in groupby(rows, lambda r: getattr(r, field)): new_keys = field_keys + [key] col_id = self.column_id(new_keys) if level + 1 < len(self.fields): count = permute(level + 1, new_keys, rows) else: # we've bottomed out count = sum(row.total for row in rows) if self.denominator_key and self.denominator_key == key: # this row must be used as the denominator total, # rather than as an entry in the table denominator = count continue total += count geo_values['estimate'][col_id] = count geo_values['error'][col_id] = 0 if self.denominator_key: total = denominator return total # rows for each geo for geo_code, geo_rows in groupby(rows, lambda r: getattr(r, code)): geo_values = data['%s-%s' % (geo_level, geo_code)] total = permute(0, [], geo_rows) # total geo_values['estimate'][self.total_column] = total geo_values['error'][self.total_column] = 0 finally: session.close() return data