Example #1
0
def load(filename=DEFAULT_GEOZONES_FILE, drop=False):
    '''
    Load a geozones archive from <filename>

    <filename> can be either a local path or a remote URL.
    '''
    ts = datetime.now().isoformat().replace('-', '').replace(':', '').split('.')[0]
    prefix = 'geozones-{0}'.format(ts)
    if filename.startswith('http'):
        log.info('Downloading GeoZones bundle: %s', filename)
        # Use tmp.open to make sure that the directory exists in FS
        with tmp.open(GEOZONE_FILENAME, 'wb') as newfile:
            newfile.write(requests.get(filename).content)
            filename = tmp.path(GEOZONE_FILENAME)

    log.info('Extracting GeoZones bundle')
    with handle_error(prefix):
        with contextlib.closing(lzma.LZMAFile(filename)) as xz:
            with tarfile.open(fileobj=xz) as f:
                f.extractall(tmp.path(prefix))

    log.info('Loading GeoZones levels')

    log.info('Loading levels.msgpack')
    levels_filepath = tmp.path(prefix + '/levels.msgpack')
    if drop and GeoLevel.objects.count():
        name = '_'.join((GeoLevel._get_collection_name(), ts))
        target = GeoLevel._get_collection_name()
        with switch_collection(GeoLevel, name):
            with handle_error(prefix, GeoLevel):
                total = load_levels(GeoLevel, levels_filepath)
                GeoLevel.objects._collection.rename(target, dropTarget=True)
    else:
        with handle_error(prefix):
            total = load_levels(GeoLevel, levels_filepath)
    log.info('Loaded {total} levels'.format(total=total))

    log.info('Loading zones.msgpack')
    zones_filepath = tmp.path(prefix + '/zones.msgpack')
    if drop and GeoZone.objects.count():
        name = '_'.join((GeoZone._get_collection_name(), ts))
        target = GeoZone._get_collection_name()
        with switch_collection(GeoZone, name):
            with handle_error(prefix, GeoZone):
                total = load_zones(GeoZone, zones_filepath)
                GeoZone.objects._collection.rename(target, dropTarget=True)
    else:
        with handle_error(prefix):
            total = load_zones(GeoZone, zones_filepath)
    log.info('Loaded {total} zones'.format(total=total))

    cleanup(prefix)
Example #2
0
def load(filename, drop=False):
    '''
    Load a geozones archive from <filename>

    <filename> can be either a local path or a remote URL.
    '''
    if filename.startswith('http'):
        log.info('Downloading GeoZones bundle: %s', filename)
        filename, _ = urlretrieve(filename, tmp.path('geozones.tar.xz'))

    log.info('Extracting GeoZones bundle')
    with contextlib.closing(lzma.LZMAFile(filename)) as xz:
        with tarfile.open(fileobj=xz) as f:
            f.extractall(tmp.root)

    log.info('Loading GeoZones levels')

    if drop:
        log.info('Dropping existing levels')
        GeoLevel.drop_collection()

    log.info('Loading levels.msgpack')
    levels_filepath = tmp.path('levels.msgpack')
    with open(levels_filepath) as fp:
        unpacker = msgpack.Unpacker(fp, encoding=str('utf-8'))
        for i, level in enumerate(unpacker, start=1):
            GeoLevel.objects(id=level['id']).modify(
                upsert=True,
                set__name=level['label'],
                set__parents=[level_ref(p) for p in level['parents']],
                set__admin_level=level.get('admin_level')
            )
    os.remove(levels_filepath)
    log.info('Loaded {total} levels'.format(total=i))

    if drop:
        log.info('Dropping existing spatial zones')
        GeoZone.drop_collection()

    log.info('Loading zones.msgpack')
    zones_filepath = tmp.path('zones.msgpack')
    with open(zones_filepath) as fp:
        unpacker = msgpack.Unpacker(fp, encoding=str('utf-8'))
        unpacker.next()  # Skip headers.
        for i, geozone in enumerate(unpacker):
            params = {
                'slug': slugify.slugify(geozone['name'], separator='-'),
                'level': geozone['level'],
                'code': geozone['code'],
                'name': geozone['name'],
                'keys': geozone.get('keys'),
                'parents': geozone.get('parents', []),
                'ancestors': geozone.get('ancestors', []),
                'successors': geozone.get('successors', []),
                'validity': geozone.get('validity'),
                'population': geozone.get('population'),
                'dbpedia': geozone.get('dbpedia'),
                'flag': geozone.get('flag'),
                'blazon': geozone.get('blazon'),
                'wikipedia': geozone.get('wikipedia'),
                'area': geozone.get('area'),
            }
            if geozone.get('geom') and (
                geozone['geom']['type'] != 'GeometryCollection' or
                    geozone['geom']['geometries']):
                params['geom'] = geozone['geom']
            try:
                GeoZone.objects(id=geozone['_id']).modify(upsert=True, **{
                    'set__{0}'.format(k): v for k, v in params.items()
                })
            except errors.ValidationError as e:
                log.warning('Validation error (%s) for %s with %s',
                            e, geozone['_id'], params)
                continue
    os.remove(zones_filepath)
    log.info('Loaded {total} zones'.format(total=i))

    shutil.rmtree(tmp.path('translations'))  # Not in use for now.
Example #3
0
def level_ref(level):
    return DBRef(GeoLevel._get_collection_name(), level)
Example #4
0
def load(filename, drop=False):
    '''
    Load a geozones archive from <filename>

    <filename> can be either a local path or a remote URL.
    '''
    if filename.startswith('http'):
        log.info('Downloading GeoZones bundle: %s', filename)
        filename, _ = urlretrieve(filename, tmp.path('geozones.tar.xz'))

    log.info('Extracting GeoZones bundle')
    with contextlib.closing(lzma.LZMAFile(filename)) as xz:
        with tarfile.open(fileobj=xz) as f:
            f.extractall(tmp.root)

    log.info('Loading GeoZones levels')

    if drop:
        log.info('Dropping existing levels')
        GeoLevel.drop_collection()

    log.info('Loading levels.msgpack')
    levels_filepath = tmp.path('levels.msgpack')
    with open(levels_filepath) as fp:
        unpacker = msgpack.Unpacker(fp, encoding=str('utf-8'))
        for i, level in enumerate(unpacker, start=1):
            GeoLevel.objects.create(
                id=level['id'],
                name=level['label'],
                parents=level['parents'],
                admin_level=level.get('admin_level')
            )
    os.remove(levels_filepath)
    log.info('Loaded {total} levels'.format(total=i))

    if drop:
        log.info('Dropping existing spatial zones')
        GeoZone.drop_collection()

    log.info('Loading zones.msgpack')
    zones_filepath = tmp.path('zones.msgpack')
    with open(zones_filepath) as fp:
        unpacker = msgpack.Unpacker(fp, encoding=str('utf-8'))
        unpacker.next()  # Skip headers.
        for i, geozone in enumerate(unpacker):
            if not geozone.get('geom') or (
                geozone['geom']['type'] == 'GeometryCollection' and
                    not geozone['geom']['geometries']):
                geom = None
            else:
                geom = geozone['geom']
            params = {
                'id': geozone['_id'],
                'slug': slugify.slugify(geozone['name'], separator='-'),
                'level': geozone['level'],
                'code': geozone['code'],
                'name': geozone['name'],
                'keys': geozone.get('keys'),
                'parents': geozone.get('parents'),
                'ancestors': geozone.get('ancestors'),
                'successors': geozone.get('successors'),
                'validity': geozone.get('validity'),
                'population': geozone.get('population'),
                'dbpedia': geozone.get('dbpedia'),
                'flag': geozone.get('flag'),
                'blazon': geozone.get('blazon'),
                'wikipedia': geozone.get('wikipedia'),
                'area': geozone.get('area'),
                'geom': geom
            }
            try:
                GeoZone.objects.create(**params)
            except errors.ValidationError as e:
                log.warning('Validation error (%s) for %s with %s',
                            e, geozone, params)
                continue
    os.remove(zones_filepath)
    log.info('Loaded {total} zones'.format(total=i))

    shutil.rmtree(tmp.path('translations'))  # Not in use for now.
Example #5
0
def level_ref(level):
    return DBRef(GeoLevel._get_collection_name(), level)