def main(): import sys total_matrix = LanguageMatrixTotal() stats_collection = stats_db.get_collection() print('Loading mother tongues...', file=sys.stderr) for region_stats in stats_collection.find({ '2011.population.by-mother-tongue': { '$exists': True } }): language_matrix = region_stats['2011']['population']['by-mother-tongue'] total_matrix.add_language_matrix(language_matrix) print('Loading languages spoken at home...', file=sys.stderr) for region_stats in stats_collection.find({ '2011.population.by-language-spoken-at-home': { '$exists': True } }): language_matrix = region_stats['2011']['population']['by-language-spoken-at-home'] total_matrix.add_language_matrix(language_matrix) print('Finding medians...', file=sys.stderr) for key in sorted(total_matrix.data.keys()): values = total_matrix.data[key] mean = sum(values) / len(values) median1 = median(values) median_deviation = median(math.fabs(value - median1) for value in values) name = REVERSE_LANGUAGES[key] print("""'%s': { name: "%s", mean: %0.2f, median: %0.2f, mad: %0.2f },""" % ( key, name, mean * 100, median1 * 100, median_deviation * 100))
def main(): import stats_db loader = FileLoader("db/statistics/region-profiles") collection = stats_db.get_collection() for region_type, csv_file in loader.region_types_and_csv_files(): print("Importing a CSV of %s..." % region_type) importer = RegionProfileCsvImporter(collection, region_type, csv_file) importer.import_all()
def main(): import stats_db loader = FileLoader('db/statistics/region-profiles') collection = stats_db.get_collection() for region_type, csv_file in loader.region_types_and_csv_files(): print('Importing a CSV of %s...' % region_type) importer = RegionProfileCsvImporter(collection, region_type, csv_file) importer.import_all()
def main(): connection = db.connect() collection = stats_db.get_collection() css = all_css_by_region_id(collection) ccss = each_ccs(connection) print('Consolidating...', file=sys.stderr) for ccs in ccss: for cs_id in ccs.child_region_ids: cs = css[cs_id] ccs.statistics.add(cs.statistics) ccs.postprocess() ccs.write(collection)
def main(): connection = db.connect() collection = stats_db.get_collection() css = all_css_by_region_id(collection) ccss = each_ccs(connection) print('Consolidating...', file=sys.stderr) for ccs in ccss: for cs_id in ccs.child_region_ids: cs = css[cs_id] ccs.statistics.add(cs.statistics) ccs.postprocess() ccs.write(collection)
def main(): import sys connection = db.connect() collection = stats_db.get_collection() c = connection.cursor() print('Querying for bounding boxes of all regions...', file=sys.stderr) c.execute(""" SELECT json_id, ST_XMin(bbox) AS sw_longitude, ST_YMin(bbox) AS sw_latitude, ST_XMax(bbox) AS ne_longitude, ST_YMax(bbox) AS ne_latitude FROM ( SELECT type || '-' || uid AS json_id, ST_Transform(ST_SetSRID(geometry, 4326), 4326) AS bbox FROM regions ) x """) print('Storing bounding boxes. "." = 10,000 regions: ', file=sys.stderr, end='', flush=True) i = 0 for row in c: region_id = row[0] if region_id.startswith('Country'): region_id = 'Province-01' region = collection.get_region(region_id) region.set('bounding-box', row[1:5]) region.save() i += 1 if i == 10000: i = 0 print('.', file=sys.stderr, end='', flush=True) print('', file=sys.stderr)
def main(): import sys connection = db.connect() collection = stats_db.get_collection() c = connection.cursor() print('Querying for bounding boxes of all regions...', file=sys.stderr) c.execute(""" SELECT json_id, ST_XMin(bbox) AS sw_longitude, ST_YMin(bbox) AS sw_latitude, ST_XMax(bbox) AS ne_longitude, ST_YMax(bbox) AS ne_latitude FROM ( SELECT type || '-' || uid AS json_id, ST_Transform(ST_SetSRID(geometry, 4326), 4326) AS bbox FROM regions ) x """) print('Storing bounding boxes. "." = 10,000 regions: ', file=sys.stderr, end='', flush=True) i = 0 for row in c: region_id = row[0] if region_id.startswith('Country'): region_id = 'Province-01' region = collection.get_region(region_id) region.set('bounding-box', row[1:5]) region.save() i += 1 if i == 10000: i = 0 print('.', file=sys.stderr, end='', flush=True) print('', file=sys.stderr)
def main(): import sys total_matrix = LanguageMatrixTotal() stats_collection = stats_db.get_collection() print('Loading mother tongues...', file=sys.stderr) for region_stats in stats_collection.find( {'2011.population.by-mother-tongue': { '$exists': True }}): language_matrix = region_stats['2011']['population'][ 'by-mother-tongue'] total_matrix.add_language_matrix(language_matrix) print('Loading languages spoken at home...', file=sys.stderr) for region_stats in stats_collection.find( {'2011.population.by-language-spoken-at-home': { '$exists': True }}): language_matrix = region_stats['2011']['population'][ 'by-language-spoken-at-home'] total_matrix.add_language_matrix(language_matrix) print('Finding medians...', file=sys.stderr) for key in sorted(total_matrix.data.keys()): values = total_matrix.data[key] mean = sum(values) / len(values) median1 = median(values) median_deviation = median( math.fabs(value - median1) for value in values) name = REVERSE_LANGUAGES[key] print( """'%s': { name: "%s", mean: %0.2f, median: %0.2f, mad: %0.2f },""" % (key, name, mean * 100, median1 * 100, median_deviation * 100))
uid2006 = line[11:21] flag = line[22] if flag in ('1', '2'): if uid2011 not in data: data[uid2011] = [] data[uid2011].append(uid2006) return data if __name__ == '__main__': import os import sys collection = stats_db.get_collection() print('Loading correspondence file...', file=sys.stderr) uid2011_to_uids2006 = load_uid2011_to_uids2006( os.path.join(os.path.dirname(__file__), '..', 'db', 'statistics', 'dissemination-blocks', '2011_92-156_DB_ID_txt.zip')) print('Opening 2006 file...', file=sys.stderr) regions2006 = iterate_regions_2006( os.path.join(os.path.dirname(__file__), '..', 'db', 'statistics', 'dissemination-blocks', '2006_92-151_XBB_txt.zip')) print('Opening 2011 file...', file=sys.stderr) regions2011 = iterate_regions_2011( os.path.join(os.path.dirname(__file__), '..', 'db', 'statistics', 'dissemination-blocks', '2011_92-151_XBB_txt.zip'))
FROM region_parents rp INNER JOIN region_min_zoom_levels rmzl ON rp.region_id = rmzl.region_id GROUP BY rp.parent_region_id ''') for r in c: regions_by_id[r[0]] = r[1] def regions_in_slices(self, slice_size): for i in range(0, len(self.regions), slice_size): yield self.regions[i:(i+slice_size)] if __name__ == '__main__': import sys connection = db.connect() stats_collection = stats_db.get_collection() print('Creating output database...', file=sys.stderr) print('PRAGMA synchronous = OFF;') print('CREATE TABLE region_statistics (region_id INTEGER PRIMARY KEY, statistics BLOB);') store = DbRegionStore(connection) print('Loading statistics per region and writing to SQLite ("." = %d regions read and written): ' % (SLICE_SIZE,), file=sys.stderr, end='', flush=True) for region_slice in store.regions_in_slices(SLICE_SIZE): regions_by_key = dict((r.key, r) for r in region_slice) stats_with_keys = stats_collection.find({ '_id': { '$in': list(regions_by_key.keys()) } }) for region_stats in stats_with_keys: key = region_stats['_id'] region = regions_by_key[key]