def main():
    import sys

    total_matrix = LanguageMatrixTotal()

    stats_collection = stats_db.get_collection()

    print('Loading mother tongues...', file=sys.stderr)
    for region_stats in stats_collection.find({ '2011.population.by-mother-tongue': { '$exists': True } }):
        language_matrix = region_stats['2011']['population']['by-mother-tongue']
        total_matrix.add_language_matrix(language_matrix)

    print('Loading languages spoken at home...', file=sys.stderr)
    for region_stats in stats_collection.find({ '2011.population.by-language-spoken-at-home': { '$exists': True } }):
        language_matrix = region_stats['2011']['population']['by-language-spoken-at-home']
        total_matrix.add_language_matrix(language_matrix)

    print('Finding medians...', file=sys.stderr)
    for key in sorted(total_matrix.data.keys()):
        values = total_matrix.data[key]

        mean = sum(values) / len(values)
        median1 = median(values)
        median_deviation = median(math.fabs(value - median1) for value in values)

        name = REVERSE_LANGUAGES[key]
        print("""'%s': { name: "%s", mean: %0.2f, median: %0.2f, mad: %0.2f },""" % (
            key, name, mean * 100, median1 * 100, median_deviation * 100))
def main():
    import stats_db

    loader = FileLoader("db/statistics/region-profiles")
    collection = stats_db.get_collection()

    for region_type, csv_file in loader.region_types_and_csv_files():
        print("Importing a CSV of %s..." % region_type)
        importer = RegionProfileCsvImporter(collection, region_type, csv_file)
        importer.import_all()
Ejemplo n.º 3
0
def main():
    import stats_db

    loader = FileLoader('db/statistics/region-profiles')
    collection = stats_db.get_collection()

    for region_type, csv_file in loader.region_types_and_csv_files():
        print('Importing a CSV of %s...' % region_type)
        importer = RegionProfileCsvImporter(collection, region_type, csv_file)
        importer.import_all()
def main():
    connection = db.connect()
    collection = stats_db.get_collection()

    css = all_css_by_region_id(collection)
    ccss = each_ccs(connection)

    print('Consolidating...', file=sys.stderr)
    for ccs in ccss:
        for cs_id in ccs.child_region_ids:
            cs = css[cs_id]
            ccs.statistics.add(cs.statistics)
        ccs.postprocess()
        ccs.write(collection)
def main():
    connection = db.connect()
    collection = stats_db.get_collection()

    css = all_css_by_region_id(collection)
    ccss = each_ccs(connection)

    print('Consolidating...', file=sys.stderr)
    for ccs in ccss:
        for cs_id in ccs.child_region_ids:
            cs = css[cs_id]
            ccs.statistics.add(cs.statistics)
        ccs.postprocess()
        ccs.write(collection)
Ejemplo n.º 6
0
def main():
    import sys

    connection = db.connect()
    collection = stats_db.get_collection()

    c = connection.cursor()

    print('Querying for bounding boxes of all regions...', file=sys.stderr)
    c.execute("""
        SELECT
            json_id,
            ST_XMin(bbox) AS sw_longitude,
            ST_YMin(bbox) AS sw_latitude,
            ST_XMax(bbox) AS ne_longitude,
            ST_YMax(bbox) AS ne_latitude
        FROM (
            SELECT
                type || '-' || uid AS json_id,
                ST_Transform(ST_SetSRID(geometry, 4326), 4326) AS bbox
            FROM regions
        ) x
        """)

    print('Storing bounding boxes. "." = 10,000 regions: ',
          file=sys.stderr,
          end='',
          flush=True)
    i = 0
    for row in c:
        region_id = row[0]
        if region_id.startswith('Country'):
            region_id = 'Province-01'
        region = collection.get_region(region_id)
        region.set('bounding-box', row[1:5])
        region.save()
        i += 1
        if i == 10000:
            i = 0
            print('.', file=sys.stderr, end='', flush=True)

    print('', file=sys.stderr)
Ejemplo n.º 7
0
def main():
    import sys

    connection = db.connect()
    collection = stats_db.get_collection()

    c = connection.cursor()

    print('Querying for bounding boxes of all regions...', file=sys.stderr)
    c.execute("""
        SELECT
            json_id,
            ST_XMin(bbox) AS sw_longitude,
            ST_YMin(bbox) AS sw_latitude,
            ST_XMax(bbox) AS ne_longitude,
            ST_YMax(bbox) AS ne_latitude
        FROM (
            SELECT
                type || '-' || uid AS json_id,
                ST_Transform(ST_SetSRID(geometry, 4326), 4326) AS bbox
            FROM regions
        ) x
        """)

    print('Storing bounding boxes. "." = 10,000 regions: ', file=sys.stderr, end='', flush=True)
    i = 0
    for row in c:
        region_id = row[0]
        if region_id.startswith('Country'):
            region_id = 'Province-01'
        region = collection.get_region(region_id)
        region.set('bounding-box', row[1:5])
        region.save()
        i += 1
        if i == 10000:
            i = 0
            print('.', file=sys.stderr, end='', flush=True)

    print('', file=sys.stderr)
Ejemplo n.º 8
0
def main():
    import sys

    total_matrix = LanguageMatrixTotal()

    stats_collection = stats_db.get_collection()

    print('Loading mother tongues...', file=sys.stderr)
    for region_stats in stats_collection.find(
        {'2011.population.by-mother-tongue': {
            '$exists': True
        }}):
        language_matrix = region_stats['2011']['population'][
            'by-mother-tongue']
        total_matrix.add_language_matrix(language_matrix)

    print('Loading languages spoken at home...', file=sys.stderr)
    for region_stats in stats_collection.find(
        {'2011.population.by-language-spoken-at-home': {
            '$exists': True
        }}):
        language_matrix = region_stats['2011']['population'][
            'by-language-spoken-at-home']
        total_matrix.add_language_matrix(language_matrix)

    print('Finding medians...', file=sys.stderr)
    for key in sorted(total_matrix.data.keys()):
        values = total_matrix.data[key]

        mean = sum(values) / len(values)
        median1 = median(values)
        median_deviation = median(
            math.fabs(value - median1) for value in values)

        name = REVERSE_LANGUAGES[key]
        print(
            """'%s': { name: "%s", mean: %0.2f, median: %0.2f, mad: %0.2f },"""
            % (key, name, mean * 100, median1 * 100, median_deviation * 100))
Ejemplo n.º 9
0
        uid2006 = line[11:21]
        flag = line[22]

        if flag in ('1', '2'):
            if uid2011 not in data:
                data[uid2011] = []
            data[uid2011].append(uid2006)

    return data


if __name__ == '__main__':
    import os
    import sys

    collection = stats_db.get_collection()

    print('Loading correspondence file...', file=sys.stderr)
    uid2011_to_uids2006 = load_uid2011_to_uids2006(
        os.path.join(os.path.dirname(__file__), '..', 'db', 'statistics',
                     'dissemination-blocks', '2011_92-156_DB_ID_txt.zip'))

    print('Opening 2006 file...', file=sys.stderr)
    regions2006 = iterate_regions_2006(
        os.path.join(os.path.dirname(__file__), '..', 'db', 'statistics',
                     'dissemination-blocks', '2006_92-151_XBB_txt.zip'))

    print('Opening 2011 file...', file=sys.stderr)
    regions2011 = iterate_regions_2011(
        os.path.join(os.path.dirname(__file__), '..', 'db', 'statistics',
                     'dissemination-blocks', '2011_92-151_XBB_txt.zip'))
            FROM region_parents rp
            INNER JOIN region_min_zoom_levels rmzl ON rp.region_id = rmzl.region_id
            GROUP BY rp.parent_region_id
            ''')
        for r in c:
            regions_by_id[r[0]] = r[1]

    def regions_in_slices(self, slice_size):
        for i in range(0, len(self.regions), slice_size):
            yield self.regions[i:(i+slice_size)]

if __name__ == '__main__':
    import sys

    connection = db.connect()
    stats_collection = stats_db.get_collection()

    print('Creating output database...', file=sys.stderr)
    print('PRAGMA synchronous = OFF;')
    print('CREATE TABLE region_statistics (region_id INTEGER PRIMARY KEY, statistics BLOB);')

    store = DbRegionStore(connection)

    print('Loading statistics per region and writing to SQLite ("." = %d regions read and written): ' % (SLICE_SIZE,), file=sys.stderr, end='', flush=True)
    for region_slice in store.regions_in_slices(SLICE_SIZE):
        regions_by_key = dict((r.key, r) for r in region_slice)

        stats_with_keys = stats_collection.find({ '_id': { '$in': list(regions_by_key.keys()) } })
        for region_stats in stats_with_keys:
            key = region_stats['_id']
            region = regions_by_key[key]