def _load_linestring(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 coords = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: vert_wkb = as_bin_str(take(8 * num_dims, data_bytes)) fmt = '%s' + 'd' * num_dims vert = list(struct.unpack(fmt % endian_token, vert_wkb)) if is_m: vert.insert(2, 0.0) coords.append(vert) if len(coords) == num_verts: break return dict(type='LineString', coordinates=list(coords))
def loads(string): """ Construct a GeoJson `dict` from WKB (`string`). """ string = iter(string) # endianness = string[0:1] endianness = as_bin_str(take(1, string)) if endianness == BIG_ENDIAN: big_endian = True elif endianness == LITTLE_ENDIAN: big_endian = False else: raise ValueError("Invalid endian byte: '0x%s'. Expected 0x00 or 0x01" % binascii.hexlify(endianness.encode()).decode()) # type_bytes = string[1:5] type_bytes = as_bin_str(take(4, string)) if not big_endian: # To identify the type, order the type bytes in big endian: type_bytes = type_bytes[::-1] geom_type = _BINARY_TO_GEOM_TYPE.get(type_bytes) # data_bytes = string[5:] # FIXME: This won't work for GeometryCollections data_bytes = string importer = _loads_registry.get(geom_type) if importer is None: _unsupported_geom_type(geom_type) data_bytes = iter(data_bytes) return importer(big_endian, type_bytes, data_bytes)
def _load_multilinestring(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 if is_m: dim = 'M' else: dim = _INT_TO_DIM_LABEL[num_dims] [num_ls] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) coords = [] while True: ls_endian = as_bin_str(take(1, data_bytes)) ls_type = as_bin_str(take(4, data_bytes)) if big_endian: assert ls_endian == BIG_ENDIAN assert ls_type == _WKB[dim]['LineString'] else: assert ls_endian == LITTLE_ENDIAN assert ls_type[::-1] == _WKB[dim]['LineString'] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) num_values = num_dims * num_verts values = struct.unpack(endian_token + 'd' * num_values, as_bin_str(take(8 * num_values, data_bytes))) values = list(block_splitter(values, num_dims)) if is_m: for v in values: v.insert(2, 0.0) coords.append(values) if len(coords) == num_ls: break return dict(type='MultiLineString', coordinates=coords)
def _load_geometrycollection(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 geometries = [] [num_geoms] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: geometry = loads(data_bytes) if is_m: _check_dimensionality(geometry, 4) else: _check_dimensionality(geometry, num_dims) # TODO(LB): Add type assertions for the geometry; collections should # not mix 2d, 3d, 4d, etc. geometries.append(geometry) if len(geometries) == num_geoms: break return dict(type='GeometryCollection', geometries=geometries)
def _load_multipoint(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 if is_m: dim = 'M' else: dim = _INT_TO_DIM_LABEL[num_dims] coords = [] [num_points] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: point_endian = as_bin_str(take(1, data_bytes)) point_type = as_bin_str(take(4, data_bytes)) values = struct.unpack('%s%s' % (endian_token, 'd' * num_dims), as_bin_str(take(8 * num_dims, data_bytes))) values = list(values) if is_m: values.insert(2, 0.0) if big_endian: assert point_endian == BIG_ENDIAN assert point_type == _WKB[dim]['Point'] else: assert point_endian == LITTLE_ENDIAN assert point_type[::-1] == _WKB[dim]['Point'] coords.append(list(values)) if len(coords) == num_points: break return dict(type='MultiPoint', coordinates=coords)
def _load_polygon(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 coords = [] [num_rings] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: ring = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) verts_wkb = as_bin_str(take(8 * num_verts * num_dims, data_bytes)) verts = block_splitter(verts_wkb, 8) if six.PY2: verts = (b''.join(x) for x in verts) elif six.PY3: verts = (b''.join(bytes([y]) for y in x) for x in verts) for vert_wkb in block_splitter(verts, num_dims): values = [ struct.unpack('%sd' % endian_token, x)[0] for x in vert_wkb ] if is_m: values.insert(2, 0.0) ring.append(values) coords.append(ring) if len(coords) == num_rings: break return dict(type='Polygon', coordinates=coords)
def _load_polygon(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 coords = [] [num_rings] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: ring = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) verts_wkb = as_bin_str(take(8 * num_verts * num_dims, data_bytes)) verts = block_splitter(verts_wkb, 8) if six.PY2: verts = (b''.join(x) for x in verts) elif six.PY3: verts = (b''.join(bytes([y]) for y in x) for x in verts) for vert_wkb in block_splitter(verts, num_dims): values = [struct.unpack('%sd' % endian_token, x)[0] for x in vert_wkb] if is_m: values.insert(2, 0.0) ring.append(values) coords.append(ring) if len(coords) == num_rings: break return dict(type='Polygon', coordinates=coords)
def _load_point(big_endian, type_bytes, data_bytes): """ Convert byte data for a Point to a GeoJSON `dict`. :param bool big_endian: If `True`, interpret the ``data_bytes`` in big endian order, else little endian. :param str type_bytes: 4-byte integer (as a binary string) indicating the geometry type (Point) and the dimensions (2D, Z, M or ZM). For consistency, these bytes are expected to always be in big endian order, regardless of the value of ``big_endian``. :param str data_bytes: Coordinate data in a binary string. :returns: GeoJSON `dict` representing the Point geometry. """ endian_token = '>' if big_endian else '<' if type_bytes == WKB_2D['Point']: coords = struct.unpack('%sdd' % endian_token, as_bin_str(take(16, data_bytes))) elif type_bytes == WKB_Z['Point']: coords = struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes))) elif type_bytes == WKB_M['Point']: # NOTE: The use of XYM types geometries is quite rare. In the interest # of removing ambiguity, we will treat all XYM geometries as XYZM when # generate the GeoJSON. A default Z value of `0.0` will be given in # this case. coords = list( struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes)))) coords.insert(2, 0.0) elif type_bytes == WKB_ZM['Point']: coords = struct.unpack('%sdddd' % endian_token, as_bin_str(take(32, data_bytes))) return dict(type='Point', coordinates=list(coords))
def _load_point(big_endian, type_bytes, data_bytes): """ Convert byte data for a Point to a GeoJSON `dict`. :param bool big_endian: If `True`, interpret the ``data_bytes`` in big endian order, else little endian. :param str type_bytes: 4-byte integer (as a binary string) indicating the geometry type (Point) and the dimensions (2D, Z, M or ZM). For consistency, these bytes are expected to always be in big endian order, regardless of the value of ``big_endian``. :param str data_bytes: Coordinate data in a binary string. :returns: GeoJSON `dict` representing the Point geometry. """ endian_token = '>' if big_endian else '<' if type_bytes == WKB_2D['Point']: coords = struct.unpack('%sdd' % endian_token, as_bin_str(take(16, data_bytes))) elif type_bytes == WKB_Z['Point']: coords = struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes))) elif type_bytes == WKB_M['Point']: # NOTE: The use of XYM types geometries is quite rare. In the interest # of removing ambiguity, we will treat all XYM geometries as XYZM when # generate the GeoJSON. A default Z value of `0.0` will be given in # this case. coords = list(struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes)))) coords.insert(2, 0.0) elif type_bytes == WKB_ZM['Point']: coords = struct.unpack('%sdddd' % endian_token, as_bin_str(take(32, data_bytes))) return dict(type='Point', coordinates=list(coords))
def _get_geom_type(type_bytes): """Get the GeoJSON geometry type label from a WKB type byte string. :param type_bytes: 4 byte string in big endian byte order containing a WKB type number. It may also contain a "has SRID" flag in the high byte (the first type, since this is big endian byte order), indicated as 0x20. If the SRID flag is not set, the high byte will always be null (0x00). :returns: 3-tuple ofGeoJSON geometry type label, the bytes resprenting the geometry type, and a separate "has SRID" flag. If the input `type_bytes` contains an SRID flag, it will be removed. >>> # Z Point, with SRID flag >>> _get_geom_type(b'\\x20\\x00\\x03\\xe9') == ( ... 'Point', b'\\x00\\x00\\x03\\xe9', True) True >>> # 2D MultiLineString, without SRID flag >>> _get_geom_type(b'\\x00\\x00\\x00\\x05') == ( ... 'MultiLineString', b'\\x00\\x00\\x00\\x05', False) True """ # slice off the high byte, which may contain the SRID flag high_byte = type_bytes[0] if six.PY3: high_byte = bytes([high_byte]) has_srid = high_byte == b'\x20' if has_srid: # replace the high byte with a null byte type_bytes = as_bin_str(b'\x00' + type_bytes[1:]) else: type_bytes = as_bin_str(type_bytes) # look up the geometry type geom_type = _BINARY_TO_GEOM_TYPE.get(type_bytes) return geom_type, type_bytes, has_srid
def _get_geom_type(type_bytes): """Get the GeoJSON geometry type label from a WKB type byte string. :param type_bytes: 4 byte string in big endian byte order containing a WKB type number. It may also contain a "has SRID" flag in the high byte (the first type, since this is big endian byte order), indicated as 0x20. If the SRID flag is not set, the high byte will always be null (0x00). :returns: 3-tuple ofGeoJSON geometry type label, the bytes representing the geometry type, and a separate "has SRID" flag. If the input `type_bytes` contains an SRID flag, it will be removed. >>> # Z Point, with SRID flag >>> _get_geom_type(b'\\x20\\x00\\x03\\xe9') == ( ... 'Point', b'\\x00\\x00\\x03\\xe9', True) True >>> # 2D MultiLineString, without SRID flag >>> _get_geom_type(b'\\x00\\x00\\x00\\x05') == ( ... 'MultiLineString', b'\\x00\\x00\\x00\\x05', False) True """ # slice off the high byte, which may contain the SRID flag high_byte = type_bytes[0] if six.PY3: high_byte = bytes([high_byte]) has_srid = high_byte == b'\x20' if has_srid: # replace the high byte with a null byte type_bytes = as_bin_str(b'\x00' + type_bytes[1:]) else: type_bytes = as_bin_str(type_bytes) # look up the geometry type geom_type = _BINARY_TO_GEOM_TYPE.get(type_bytes) return geom_type, type_bytes, has_srid
def _load_multipolygon(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 if is_m: dim = 'M' else: dim = _INT_TO_DIM_LABEL[num_dims] [num_polys] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) coords = [] while True: polygon = [] poly_endian = as_bin_str(take(1, data_bytes)) poly_type = as_bin_str(take(4, data_bytes)) if big_endian: assert poly_endian == BIG_ENDIAN assert poly_type == _WKB[dim]['Polygon'] else: assert poly_endian == LITTLE_ENDIAN assert poly_type[::-1] == _WKB[dim]['Polygon'] [num_rings] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) for _ in range(num_rings): ring = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) for _ in range(num_verts): vert_wkb = as_bin_str(take(8 * num_dims, data_bytes)) fmt = '%s' + 'd' * num_dims vert = list(struct.unpack(fmt % endian_token, vert_wkb)) if is_m: vert.insert(2, 0.0) ring.append(vert) polygon.append(ring) coords.append(polygon) if len(coords) == num_polys: break return dict(type='MultiPolygon', coordinates=coords)
def loads(string): """ Construct a GeoJSON `dict` from WKB (`string`). The resulting GeoJSON `dict` will include the SRID as an integer in the `meta` object. This was an arbitrary decision made by `geomet, the discussion of which took place here: https://github.com/geomet/geomet/issues/28. In order to be consistent with other libraries [1] and (deprecated) specifications [2], also include the same information in a `crs` object. This isn't ideal, but the `crs` member is no longer part of the GeoJSON standard, according to RFC7946 [3]. However, it's still useful to include this information in GeoJSON payloads because it supports conversion to EWKT/EWKB (which are canonical formats used by PostGIS and the like). Example: {'type': 'Point', 'coordinates': [0.0, 1.0], 'meta': {'srid': 4326}, 'crs': {'type': 'name', 'properties': {'name': 'EPSG4326'}}} NOTE(larsbutler): I'm not sure if it's valid to just prefix EPSG (European Petroluem Survey Group) to an SRID like this, but we'll stick with it for now until it becomes a problem. NOTE(larsbutler): Ideally, we should use URNs instead of this notation, according to the new GeoJSON spec [4]. However, in order to be consistent with [1], we'll stick with this approach for now. References: [1] - https://github.com/bryanjos/geo/issues/76 [2] - http://geojson.org/geojson-spec.html#coordinate-reference-system-objects [3] - https://tools.ietf.org/html/rfc7946#appendix-B.1 [4] - https://tools.ietf.org/html/rfc7946#section-4 """ # noqa string = iter(string) # endianness = string[0:1] endianness = as_bin_str(take(1, string)) if endianness == BIG_ENDIAN: big_endian = True elif endianness == LITTLE_ENDIAN: big_endian = False else: raise ValueError("Invalid endian byte: '0x%s'. Expected 0x00 or 0x01" % binascii.hexlify(endianness.encode()).decode()) endian_token = '>' if big_endian else '<' # type_bytes = string[1:5] type_bytes = as_bin_str(take(4, string)) if not big_endian: # To identify the type, order the type bytes in big endian: type_bytes = type_bytes[::-1] geom_type, type_bytes, has_srid = _get_geom_type(type_bytes) srid = None if has_srid: srid_field = as_bin_str(take(4, string)) [srid] = struct.unpack('%si' % endian_token, srid_field) # data_bytes = string[5:] # FIXME: This won't work for GeometryCollections data_bytes = string importer = _loads_registry.get(geom_type) if importer is None: _unsupported_geom_type(geom_type) data_bytes = iter(data_bytes) result = importer(big_endian, type_bytes, data_bytes) if has_srid: # As mentioned in the docstring above, include both approaches to # indicating the SRID. result['meta'] = {'srid': int(srid)} result['crs'] = { 'type': 'name', 'properties': { 'name': 'EPSG%s' % srid }, } return result
def loads(string): """ Construct a GeoJSON `dict` from WKB (`string`). The resulting GeoJSON `dict` will include the SRID as an integer in the `meta` object. This was an arbitrary decision made by `geomet, the discussion of which took place here: https://github.com/geomet/geomet/issues/28. In order to be consistent with other libraries [1] and (deprecated) specifications [2], also include the same information in a `crs` object. This isn't ideal, but the `crs` member is no longer part of the GeoJSON standard, according to RFC7946 [3]. However, it's still useful to include this information in GeoJSON payloads because it supports conversion to EWKT/EWKB (which are canonical formats used by PostGIS and the like). Example: {'type': 'Point', 'coordinates': [0.0, 1.0], 'meta': {'srid': 4326}, 'crs': {'type': 'name', 'properties': {'name': 'EPSG4326'}}} NOTE(larsbutler): I'm not sure if it's valid to just prefix EPSG (European Petroluem Survey Group) to an SRID like this, but we'll stick with it for now until it becomes a problem. NOTE(larsbutler): Ideally, we should use URNs instead of this notation, according to the new GeoJSON spec [4]. However, in order to be consistent with [1], we'll stick with this approach for now. References: [1] - https://github.com/bryanjos/geo/issues/76 [2] - http://geojson.org/geojson-spec.html#coordinate-reference-system-objects [3] - https://tools.ietf.org/html/rfc7946#appendix-B.1 [4] - https://tools.ietf.org/html/rfc7946#section-4 """ # noqa string = iter(string) # endianness = string[0:1] endianness = as_bin_str(take(1, string)) if endianness == BIG_ENDIAN: big_endian = True elif endianness == LITTLE_ENDIAN: big_endian = False else: raise ValueError("Invalid endian byte: '0x%s'. Expected 0x00 or 0x01" % binascii.hexlify(endianness.encode()).decode()) endian_token = '>' if big_endian else '<' # type_bytes = string[1:5] type_bytes = as_bin_str(take(4, string)) if not big_endian: # To identify the type, order the type bytes in big endian: type_bytes = type_bytes[::-1] geom_type, type_bytes, has_srid = _get_geom_type(type_bytes) srid = None if has_srid: srid_field = as_bin_str(take(4, string)) [srid] = struct.unpack('%si' % endian_token, srid_field) # data_bytes = string[5:] # FIXME: This won't work for GeometryCollections data_bytes = string importer = _loads_registry.get(geom_type) if importer is None: _unsupported_geom_type(geom_type) data_bytes = iter(data_bytes) result = importer(big_endian, type_bytes, data_bytes) if has_srid: # As mentioned in the docstring above, include both approaches to # indicating the SRID. result['meta'] = {'srid': int(srid)} result['crs'] = { 'type': 'name', 'properties': {'name': 'EPSG%s' % srid}, } return result