def load(filename=DEFAULT_GEOZONES_FILE, drop=False): ''' Load a geozones archive from <filename> <filename> can be either a local path or a remote URL. ''' ts = datetime.now().isoformat().replace('-', '').replace(':', '').split('.')[0] prefix = 'geozones-{0}'.format(ts) if filename.startswith('http'): log.info('Downloading GeoZones bundle: %s', filename) # Use tmp.open to make sure that the directory exists in FS with tmp.open(GEOZONE_FILENAME, 'wb') as newfile: newfile.write(requests.get(filename).content) filename = tmp.path(GEOZONE_FILENAME) log.info('Extracting GeoZones bundle') with handle_error(prefix): with contextlib.closing(lzma.LZMAFile(filename)) as xz: with tarfile.open(fileobj=xz) as f: f.extractall(tmp.path(prefix)) log.info('Loading GeoZones levels') log.info('Loading levels.msgpack') levels_filepath = tmp.path(prefix + '/levels.msgpack') if drop and GeoLevel.objects.count(): name = '_'.join((GeoLevel._get_collection_name(), ts)) target = GeoLevel._get_collection_name() with switch_collection(GeoLevel, name): with handle_error(prefix, GeoLevel): total = load_levels(GeoLevel, levels_filepath) GeoLevel.objects._collection.rename(target, dropTarget=True) else: with handle_error(prefix): total = load_levels(GeoLevel, levels_filepath) log.info('Loaded {total} levels'.format(total=total)) log.info('Loading zones.msgpack') zones_filepath = tmp.path(prefix + '/zones.msgpack') if drop and GeoZone.objects.count(): name = '_'.join((GeoZone._get_collection_name(), ts)) target = GeoZone._get_collection_name() with switch_collection(GeoZone, name): with handle_error(prefix, GeoZone): total = load_zones(GeoZone, zones_filepath) GeoZone.objects._collection.rename(target, dropTarget=True) else: with handle_error(prefix): total = load_zones(GeoZone, zones_filepath) log.info('Loaded {total} zones'.format(total=total)) cleanup(prefix)
def load(filename, drop=False): ''' Load a geozones archive from <filename> <filename> can be either a local path or a remote URL. ''' if filename.startswith('http'): log.info('Downloading GeoZones bundle: %s', filename) filename, _ = urlretrieve(filename, tmp.path('geozones.tar.xz')) log.info('Extracting GeoZones bundle') with contextlib.closing(lzma.LZMAFile(filename)) as xz: with tarfile.open(fileobj=xz) as f: f.extractall(tmp.root) log.info('Loading GeoZones levels') if drop: log.info('Dropping existing levels') GeoLevel.drop_collection() log.info('Loading levels.msgpack') levels_filepath = tmp.path('levels.msgpack') with open(levels_filepath) as fp: unpacker = msgpack.Unpacker(fp, encoding=str('utf-8')) for i, level in enumerate(unpacker, start=1): GeoLevel.objects(id=level['id']).modify( upsert=True, set__name=level['label'], set__parents=[level_ref(p) for p in level['parents']], set__admin_level=level.get('admin_level') ) os.remove(levels_filepath) log.info('Loaded {total} levels'.format(total=i)) if drop: log.info('Dropping existing spatial zones') GeoZone.drop_collection() log.info('Loading zones.msgpack') zones_filepath = tmp.path('zones.msgpack') with open(zones_filepath) as fp: unpacker = msgpack.Unpacker(fp, encoding=str('utf-8')) unpacker.next() # Skip headers. for i, geozone in enumerate(unpacker): params = { 'slug': slugify.slugify(geozone['name'], separator='-'), 'level': geozone['level'], 'code': geozone['code'], 'name': geozone['name'], 'keys': geozone.get('keys'), 'parents': geozone.get('parents', []), 'ancestors': geozone.get('ancestors', []), 'successors': geozone.get('successors', []), 'validity': geozone.get('validity'), 'population': geozone.get('population'), 'dbpedia': geozone.get('dbpedia'), 'flag': geozone.get('flag'), 'blazon': geozone.get('blazon'), 'wikipedia': geozone.get('wikipedia'), 'area': geozone.get('area'), } if geozone.get('geom') and ( geozone['geom']['type'] != 'GeometryCollection' or geozone['geom']['geometries']): params['geom'] = geozone['geom'] try: GeoZone.objects(id=geozone['_id']).modify(upsert=True, **{ 'set__{0}'.format(k): v for k, v in params.items() }) except errors.ValidationError as e: log.warning('Validation error (%s) for %s with %s', e, geozone['_id'], params) continue os.remove(zones_filepath) log.info('Loaded {total} zones'.format(total=i)) shutil.rmtree(tmp.path('translations')) # Not in use for now.
def level_ref(level): return DBRef(GeoLevel._get_collection_name(), level)
def load(filename, drop=False): ''' Load a geozones archive from <filename> <filename> can be either a local path or a remote URL. ''' if filename.startswith('http'): log.info('Downloading GeoZones bundle: %s', filename) filename, _ = urlretrieve(filename, tmp.path('geozones.tar.xz')) log.info('Extracting GeoZones bundle') with contextlib.closing(lzma.LZMAFile(filename)) as xz: with tarfile.open(fileobj=xz) as f: f.extractall(tmp.root) log.info('Loading GeoZones levels') if drop: log.info('Dropping existing levels') GeoLevel.drop_collection() log.info('Loading levels.msgpack') levels_filepath = tmp.path('levels.msgpack') with open(levels_filepath) as fp: unpacker = msgpack.Unpacker(fp, encoding=str('utf-8')) for i, level in enumerate(unpacker, start=1): GeoLevel.objects.create( id=level['id'], name=level['label'], parents=level['parents'], admin_level=level.get('admin_level') ) os.remove(levels_filepath) log.info('Loaded {total} levels'.format(total=i)) if drop: log.info('Dropping existing spatial zones') GeoZone.drop_collection() log.info('Loading zones.msgpack') zones_filepath = tmp.path('zones.msgpack') with open(zones_filepath) as fp: unpacker = msgpack.Unpacker(fp, encoding=str('utf-8')) unpacker.next() # Skip headers. for i, geozone in enumerate(unpacker): if not geozone.get('geom') or ( geozone['geom']['type'] == 'GeometryCollection' and not geozone['geom']['geometries']): geom = None else: geom = geozone['geom'] params = { 'id': geozone['_id'], 'slug': slugify.slugify(geozone['name'], separator='-'), 'level': geozone['level'], 'code': geozone['code'], 'name': geozone['name'], 'keys': geozone.get('keys'), 'parents': geozone.get('parents'), 'ancestors': geozone.get('ancestors'), 'successors': geozone.get('successors'), 'validity': geozone.get('validity'), 'population': geozone.get('population'), 'dbpedia': geozone.get('dbpedia'), 'flag': geozone.get('flag'), 'blazon': geozone.get('blazon'), 'wikipedia': geozone.get('wikipedia'), 'area': geozone.get('area'), 'geom': geom } try: GeoZone.objects.create(**params) except errors.ValidationError as e: log.warning('Validation error (%s) for %s with %s', e, geozone, params) continue os.remove(zones_filepath) log.info('Loaded {total} zones'.format(total=i)) shutil.rmtree(tmp.path('translations')) # Not in use for now.