Exemple #1
0
def status(ctx):
    """Display some informations and statistics"""
    title("Current status")

    section("Settings")
    click.echo("GEOZONES_HOME: {0}".format(ctx.obj["home"]))
    section("Levels")
    for level in ctx.obj["levels"]:
        click.echo("{id}: {label}".format(**level.__dict__))

    section("downloads")
    urls = (level.urls for level in ctx.obj["levels"] if level.urls)
    urls = set([url for lst in urls for url in lst])
    for url in urls:
        filename = basename(url).strip()
        click.echo("{0} ... ".format(filename), nl=False)
        if os.path.exists(os.path.join(DL_DIR, filename)):
            success("present")
        else:
            error("absent")

    section("coverage")
    zones = DB()
    total = 0
    properties = ("population", "area", "wikipedia")
    totals = dict((prop, 0) for prop in properties)

    def countprop(name):
        results = zones.aggregate(
            [
                {"$match": {name: {"$exists": True}, "level": {"$in": [l.id for l in ctx.obj["levels"]]}}},
                {"$group": {"_id": "$level", "value": {"$sum": 1}}},
            ]
        )
        return dict((r["_id"], r["value"]) for r in results)

    def display_prop(name, count, total):
        click.echo("\t{0}: ".format(name), nl=False)
        if count == 0:
            func = error
        elif count == total:
            func = success
        else:
            func = warning
        func("{0}/{1}".format(count, total))

    counts = dict((p, countprop(p)) for p in properties)
    for level in ctx.obj["levels"]:
        count = zones.count({"level": level.id})
        total += count
        click.echo("{0}: {1}".format(level.id, count))

        for prop in properties:
            prop_count = counts[prop].get(level.id, 0)
            totals[prop] += prop_count
            display_prop(prop, prop_count, count)
    click.secho("TOTAL: {0}".format(total), bold=True)
    for prop in properties:
        prop_total = totals[prop]
        display_prop(prop, prop_total, total)
Exemple #2
0
def explore(debug, launch):
    '''A web interface to explore data'''
    if not debug:  # Avoid dual title
        title('Running the exploration Web interface')
    import explore
    if launch:
        click.launch('http://localhost:5000/')
    explore.run(debug)
Exemple #3
0
def explore(debug, launch):
    '''A web interface to explore data'''
    if not debug:  # Avoid dual title
        title('Running the exploration Web interface')
    import explore
    if launch:
        click.launch('http://localhost:5000/')
    explore.run(debug)
Exemple #4
0
def postprocess(ctx, only):
    '''Perform some postprocessing'''
    title('Performing post-processing')
    zones = DB()

    for level in ctx.obj['levels']:
        level.postprocess(DL_DIR, zones, only)

    success('Post-processing done')
Exemple #5
0
def postprocess(ctx):
    '''Perform some postprocessing'''
    title('Performing post-processing')
    zones = DB()

    for level in ctx.obj['levels']:
        level.postprocess(DL_DIR, zones)

    success('Post-processing done')
Exemple #6
0
def postprocess(ctx, only):
    """Perform some postprocessing"""
    title("Performing post-processing")
    zones = DB()

    for level in ctx.obj["levels"]:
        level.postprocess(DL_DIR, zones, only)

    success("Post-processing done")
Exemple #7
0
def explore(debug, launch):
    """A web interface to explore data"""
    if not debug:  # Avoid dual title
        title("Running the exploration Web interface")
    import explore

    if launch:
        click.launch("http://localhost:5000/")
    explore.run(debug)
Exemple #8
0
def aggregate(ctx):
    """Perform zones aggregations"""
    title("Performing zones aggregations")
    zones = DB()

    total = 0

    for level in reversed(ctx.obj["levels"]):
        total += level.build_aggregates(zones)

    success("Done: Built {0} zones by aggregation".format(total))
Exemple #9
0
def aggregate(ctx):
    '''Perform zones aggregations'''
    title('Performing zones aggregations')
    zones = DB()

    total = 0

    for level in reversed(ctx.obj['levels']):
        total += level.build_aggregates(zones)

    success('Done: Built {0} zones by aggregation'.format(total))
Exemple #10
0
def aggregate(ctx):
    '''Perform zones aggregations'''
    title('Performing zones aggregations')
    zones = DB()

    total = 0

    for level in reversed(ctx.obj['levels']):
        total += level.build_aggregates(zones)

    success('Done: Built {0} zones by aggregation'.format(total))
Exemple #11
0
def download(ctx):
    '''Download sources datasets'''
    title('Downloading required datasets')
    if not exists(DL_DIR):
        os.makedirs(DL_DIR)

    urls = (level.urls for level in ctx.obj['levels'] if level.urls)
    urls = set([url for lst in urls for url in lst])
    for url in urls:
        info('Dealing with {0}'.format(url))
        filename, size = extract_meta_from_headers(url)
        info('Downloading {0}'.format(filename))
        with click.progressbar(length=size) as bar:
            def reporthook(blocknum, blocksize, totalsize):
                read = blocknum * blocksize
                if read <= 0:
                    return
                if read > totalsize:
                    bar.update(size)
                else:
                    bar.update(read)

            urlretrieve(url, join(DL_DIR, filename), reporthook=reporthook)
Exemple #12
0
def load(ctx, drop):
    '''Load zones from a folder of zip files containing shapefiles'''
    title('Extracting zones from datasets')
    zones = DB()

    if drop:
        info('Drop existing collection')
        zones.drop()

    with ok('Creating index (level,code)'):
        zones.create_index([('level', ASCENDING), ('code', ASCENDING)])
    info('Creating index (level,keys)')
    zones.create_index([('level', ASCENDING), ('keys', ASCENDING)])
    info('Creating index (parents)')
    zones.create_index('parents')

    total = 0

    for level in ctx.obj['levels']:
        info('Processing level "{0}"'.format(level.id))
        total += level.load(DL_DIR, zones)

    success('Done: Loaded {0} zones'.format(total))
Exemple #13
0
def load(ctx, drop):
    '''Load zones from a folder of zip files containing shapefiles'''
    title('Extracting zones from datasets')
    zones = DB()

    if drop:
        info('Drop existing collection')
        zones.drop()

    with ok('Creating index (level,code)'):
        zones.create_index([('level', ASCENDING), ('code', ASCENDING)])
    info('Creating index (level,keys)')
    zones.create_index([('level', ASCENDING), ('keys', ASCENDING)])
    info('Creating index (parents)')
    zones.create_index('parents')

    total = 0

    for level in ctx.obj['levels']:
        info('Processing level "{0}"'.format(level.id))
        total += level.load(DL_DIR, zones)

    success('Done: Loaded {0} zones'.format(total))
Exemple #14
0
def load(ctx, drop):
    """Load zones from a folder of zip files containing shapefiles"""
    title("Extracting zones from datasets")
    zones = DB()

    if drop:
        info("Drop existing collection")
        zones.drop()

    with ok("Creating index (level,code)"):
        zones.create_index([("level", ASCENDING), ("code", ASCENDING)])
    info("Creating index (level,keys)")
    zones.create_index([("level", ASCENDING), ("keys", ASCENDING)])
    info("Creating index (parents)")
    zones.create_index("parents")

    total = 0

    for level in ctx.obj["levels"]:
        info('Processing level "{0}"'.format(level.id))
        total += level.load(DL_DIR, zones)

    success("Done: Loaded {0} zones".format(total))
Exemple #15
0
def download(ctx):
    '''Download sources datasets'''
    title('Downloading required datasets')
    if not exists(DL_DIR):
        os.makedirs(DL_DIR)

    urls = (level.urls for level in ctx.obj['levels'] if level.urls)
    urls = set([url for lst in urls for url in lst])
    for url in urls:
        info('Dealing with {0}'.format(url))
        filename, size = extract_meta_from_headers(url)
        info('Downloading {0}'.format(filename))
        with click.progressbar(length=size) as bar:

            def reporthook(blocknum, blocksize, totalsize):
                read = blocknum * blocksize
                if read <= 0:
                    return
                if read > totalsize:
                    bar.update(size)
                else:
                    bar.update(read)

            urlretrieve(url, join(DL_DIR, filename), reporthook=reporthook)
Exemple #16
0
def status(ctx):
    '''Display some informations and statistics'''
    title('Current status')

    section('Settings')
    click.echo('GEOZONES_HOME: {0}'.format(ctx.obj['home']))
    section('Levels')
    for level in ctx.obj['levels']:
        click.echo('{id}: {label}'.format(**level.__dict__))

    section('downloads')
    urls = (level.urls for level in ctx.obj['levels'] if level.urls)
    urls = set([url for lst in urls for url in lst])
    for url in urls:
        filename = basename(url).strip()
        click.echo('{0} ... '.format(filename), nl=False)
        if os.path.exists(os.path.join(DL_DIR, filename)):
            success('present')
        else:
            error('absent')

    section('coverage')
    zones = DB()
    total = 0
    properties = ('population', 'area', 'wikipedia')
    totals = dict((prop, 0) for prop in properties)

    def countprop(name):
        results = zones.aggregate([
            {'$match': {
                name: {'$exists': True},
                'level': {'$in': [l.id for l in ctx.obj['levels']]}
            }},
            {'$group': {'_id': '$level', 'value': {'$sum': 1}}}
        ])
        return dict((r['_id'], r['value']) for r in results)

    def display_prop(name, count, total):
        click.echo('\t{0}: '.format(name), nl=False)
        if count == 0:
            func = error
        elif count == total:
            func = success
        else:
            func = warning
        func('{0}/{1}'.format(count, total))

    counts = dict((p, countprop(p)) for p in properties)
    for level in ctx.obj['levels']:
        count = zones.count({'level': level.id})
        total += count
        click.echo('{0}: {1}'.format(level.id, count))

        for prop in properties:
            prop_count = counts[prop].get(level.id, 0)
            totals[prop] += prop_count
            display_prop(prop, prop_count, count)
    click.secho('TOTAL: {0}'.format(total), bold=True)
    for prop in properties:
        prop_total = totals[prop]
        display_prop(prop, prop_total, total)
Exemple #17
0
def dist(ctx, pretty, split, compress, serialization):
    '''Dump a distributable file'''
    title(
        'Dumping data to {serialization}'.format(serialization=serialization))
    geozones = DB()
    filenames = []

    if not exists(DIST_DIR):
        os.makedirs(DIST_DIR)

    os.chdir(DIST_DIR)
    level_ids = [l.id for l in ctx.obj['levels']]

    if split:
        for level_id in level_ids:
            filename = 'zones-{level}.{serialization}'.format(
                level=level_id.replace('/', '-'), serialization=serialization)
            with ok('Generating {filename}'.format(filename=filename)):
                zones = geozones.find({'level': level_id})
                if serialization == 'json':
                    with open(filename, 'w') as out:
                        geojson.dump(zones, out, pretty=pretty)
                else:
                    packer = msgpack.Packer(use_bin_type=True)
                    with open(filename, 'wb') as out:
                        for zone in zones:
                            out.write(packer.pack(zone))
            filenames.append(filename)
    else:
        filename = 'zones.{serialization}'.format(serialization=serialization)
        with ok('Generating {filename}'.format(filename=filename)):
            zones = geozones.find({'level': {'$in': level_ids}})
            if serialization == 'json':
                with open(filename, 'w') as out:
                    geojson.dump(zones, out, pretty=pretty)
            else:
                packer = msgpack.Packer(use_bin_type=True)
                with open(filename, 'wb') as out:
                    for zone in zones:
                        out.write(packer.pack(zone))
        filenames.append(filename)

    filename = 'levels.{serialization}'.format(serialization=serialization)
    with ok('Generating {filename}'.format(filename=filename)):
        data = [{
            'id': level.id,
            'label': level.label,
            'parents': [p.id for p in level.parents]
        } for level in ctx.obj['levels']]
        if serialization == 'json':
            with open(filename, 'w') as out:
                if pretty:
                    json.dump(data, out, indent=4)
                else:
                    json.dump(data, out)
        else:
            packer = msgpack.Packer(use_bin_type=True)
            with open(filename, 'wb') as out:
                for item in data:
                    out.write(packer.pack(item))
        filenames.append(filename)

    if compress:
        filename = 'geozones-translations.tar.xz'
        with ok('Compressing to {0}'.format(filename)):
            with tarfile.open(filename, 'w:xz') as txz:
                txz.add(join(ctx.obj['home'], 'translations'), 'translations')

        filename = 'geozones-split.tar.xz' if split else 'geozones.tar.xz'

        filename = 'geozones{split}-{serialization}.tar.xz'.format(
            split='-split' if split else '', serialization=serialization)
        with ok('Compressing to {0}'.format(filename)):
            with tarfile.open(filename, 'w:xz') as txz:
                for name in filenames:
                    txz.add(name)
                # Add translations
                txz.add(join(ctx.obj['home'], 'translations'), 'translations')

    os.chdir(ctx.obj['home'])
Exemple #18
0
def status(ctx):
    '''Display some informations and statistics'''
    title('Current status')

    section('Settings')
    click.echo('GEOZONES_HOME: {0}'.format(ctx.obj['home']))
    section('Levels')
    for level in ctx.obj['levels']:
        click.echo('{id}: {label}'.format(**level.__dict__))

    section('downloads')
    urls = (level.urls for level in ctx.obj['levels'] if level.urls)
    urls = set([url for lst in urls for url in lst])
    for url in urls:
        filename = basename(url).strip()
        click.echo('{0} ... '.format(filename), nl=False)
        if os.path.exists(os.path.join(DL_DIR, filename)):
            success('present')
        else:
            error('absent')

    section('coverage')
    zones = DB()
    total = 0
    properties = ('population', 'area', 'wikipedia')
    totals = dict((prop, 0) for prop in properties)

    def countprop(name):
        results = zones.aggregate([{
            '$match': {
                name: {
                    '$exists': True
                },
                'level': {
                    '$in': [l.id for l in ctx.obj['levels']]
                }
            }
        }, {
            '$group': {
                '_id': '$level',
                'value': {
                    '$sum': 1
                }
            }
        }])
        return dict((r['_id'], r['value']) for r in results)

    def display_prop(name, count, total):
        click.echo('\t{0}: '.format(name), nl=False)
        if count == 0:
            func = error
        elif count == total:
            func = success
        else:
            func = warning
        func('{0}/{1}'.format(count, total))

    counts = dict((p, countprop(p)) for p in properties)
    for level in ctx.obj['levels']:
        count = zones.count({'level': level.id})
        total += count
        click.echo('{0}: {1}'.format(level.id, count))

        for prop in properties:
            prop_count = counts[prop].get(level.id, 0)
            totals[prop] += prop_count
            display_prop(prop, prop_count, count)
    click.secho('TOTAL: {0}'.format(total), bold=True)
    for prop in properties:
        prop_total = totals[prop]
        display_prop(prop, prop_total, total)
Exemple #19
0
def dist(ctx, pretty, split, compress, serialization, keys):
    """Dump a distributable file"""
    keys = keys and keys.split(",")
    title("Dumping data to {serialization} with keys {keys}".format(serialization=serialization, keys=keys))
    geozones = DB()
    filenames = []

    if not exists(DIST_DIR):
        os.makedirs(DIST_DIR)

    os.chdir(DIST_DIR)
    level_ids = [l.id for l in ctx.obj["levels"]]

    if split:
        for level_id in level_ids:
            filename = "zones-{level}.{serialization}".format(
                level=level_id.replace("/", "-"), serialization=serialization
            )
            with ok("Generating {filename}".format(filename=filename)):
                zones = geozones.find({"level": level_id})
                if serialization == "json":
                    with open(filename, "w") as out:
                        geojson.dump(zones, out, pretty=pretty, keys=keys)
                else:
                    packer = msgpack.Packer(use_bin_type=True)
                    with open(filename, "wb") as out:
                        for zone in zones:
                            out.write(packer.pack(zone))
            filenames.append(filename)
    else:
        filename = "zones.{serialization}".format(serialization=serialization)
        with ok("Generating {filename}".format(filename=filename)):
            zones = geozones.find({"level": {"$in": level_ids}})
            if serialization == "json":
                with open(filename, "w") as out:
                    geojson.dump(zones, out, pretty=pretty, keys=keys)
            else:
                packer = msgpack.Packer(use_bin_type=True)
                with open(filename, "wb") as out:
                    for zone in zones:
                        out.write(packer.pack(zone))
        filenames.append(filename)

    filename = "levels.{serialization}".format(serialization=serialization)
    with ok("Generating {filename}".format(filename=filename)):
        data = [
            {"id": level.id, "label": level.label, "parents": [p.id for p in level.parents]}
            for level in ctx.obj["levels"]
        ]
        if serialization == "json":
            with open(filename, "w") as out:
                if pretty:
                    json.dump(data, out, indent=4)
                else:
                    json.dump(data, out)
        else:
            packer = msgpack.Packer(use_bin_type=True)
            with open(filename, "wb") as out:
                for item in data:
                    out.write(packer.pack(item))
        filenames.append(filename)

    if compress:
        filename = "geozones-translations.tar.xz"
        with ok("Compressing to {0}".format(filename)):
            with tarfile.open(filename, "w:xz") as txz:
                txz.add(join(ctx.obj["home"], "translations"), "translations")

        filename = "geozones-split.tar.xz" if split else "geozones.tar.xz"

        filename = "geozones{split}-{serialization}.tar.xz".format(
            split="-split" if split else "", serialization=serialization
        )
        with ok("Compressing to {0}".format(filename)):
            with tarfile.open(filename, "w:xz") as txz:
                for name in filenames:
                    txz.add(name)
                # Add translations
                txz.add(join(ctx.obj["home"], "translations"), "translations")

    os.chdir(ctx.obj["home"])
Exemple #20
0
def dist(ctx, pretty, split, compress, serialization):
    '''Dump a distributable file'''
    title('Dumping data to {serialization}'.format(
        serialization=serialization))
    geozones = DB()
    filenames = []

    if not exists(DIST_DIR):
        os.makedirs(DIST_DIR)

    os.chdir(DIST_DIR)
    level_ids = [l.id for l in ctx.obj['levels']]

    if split:
        for level_id in level_ids:
            filename = 'zones-{level}.{serialization}'.format(
                level=level_id.replace('/', '-'), serialization=serialization)
            with ok('Generating {filename}'.format(filename=filename)):
                zones = geozones.find({'level': level_id})
                if serialization == 'json':
                    with open(filename, 'w') as out:
                        geojson.dump(zones, out, pretty=pretty)
                else:
                    packer = msgpack.Packer(use_bin_type=True)
                    with open(filename, 'wb') as out:
                        for zone in zones:
                            out.write(packer.pack(zone))
            filenames.append(filename)
    else:
        filename = 'zones.{serialization}'.format(serialization=serialization)
        with ok('Generating {filename}'.format(filename=filename)):
            zones = geozones.find({'level': {'$in': level_ids}})
            if serialization == 'json':
                with open(filename, 'w') as out:
                    geojson.dump(zones, out, pretty=pretty)
            else:
                packer = msgpack.Packer(use_bin_type=True)
                with open(filename, 'wb') as out:
                    for zone in zones:
                        out.write(packer.pack(zone))
        filenames.append(filename)

    filename = 'levels.{serialization}'.format(serialization=serialization)
    with ok('Generating {filename}'.format(filename=filename)):
        data = [{
            'id': level.id,
            'label': level.label,
            'parents': [p.id for p in level.parents]
        } for level in ctx.obj['levels']]
        if serialization == 'json':
            with open(filename, 'w') as out:
                if pretty:
                    json.dump(data, out, indent=4)
                else:
                    json.dump(data, out)
        else:
            packer = msgpack.Packer(use_bin_type=True)
            with open(filename, 'wb') as out:
                for item in data:
                    out.write(packer.pack(item))
        filenames.append(filename)

    if compress:
        filename = 'geozones-translations.tar.xz'
        with ok('Compressing to {0}'.format(filename)):
            with tarfile.open(filename, 'w:xz') as txz:
                txz.add(join(ctx.obj['home'], 'translations'), 'translations')

        filename = 'geozones-split.tar.xz' if split else 'geozones.tar.xz'

        filename = 'geozones{split}-{serialization}.tar.xz'.format(
            split='-split' if split else '', serialization=serialization)
        with ok('Compressing to {0}'.format(filename)):
            with tarfile.open(filename, 'w:xz') as txz:
                for name in filenames:
                    txz.add(name)
                # Add translations
                txz.add(join(ctx.obj['home'], 'translations'), 'translations')

    os.chdir(ctx.obj['home'])