def loads(self, obj, encoding="bytes"): binary_parser = BinaryParser(obj) spatial_parser_number = binary_parser.read_int() spatial_parser = self.get_parser(spatial_parser_number) parsed_row = spatial_parser.deserialize(binary_parser) return parsed_row
def deserialize(cls, parser: BinaryParser) -> MultiPoint: for _ in range(4): parser.read_double() number_of_points = parser.read_int() coordinates = read_coordinates(parser, number_of_points) has_user_data = parser.read_boolean() return MultiPoint(coordinates)
def deserialize(cls, parser: BinaryParser) -> Point: x = parser.read_double() y = parser.read_double() has_user_data = parser.read_boolean() if has_user_data: for _ in range(3): parser.read_byte() return Point(x, y)
def deserialize( cls, parser: BinaryParser) -> Union[LineString, MultiLineString]: for _ in range(4): parser.read_double() num_parts = parser.read_int() num_points = parser.read_int() offsets = OffsetsReader.read_offsets(parser, num_parts, num_points) lines = [] for i in range(num_parts): read_scale = offsets[i + 1] - offsets[i] coordinate_sequence = read_coordinates(parser, read_scale) lines.append(LineString(coordinate_sequence)) if num_parts == 1: line = lines[0] elif num_parts > 1: line = MultiLineString(lines) else: raise InvalidGeometryException("Invalid geometry") has_user_data = parser.read_boolean() if has_user_data: parser.read_byte() parser.read_byte() parser.read_byte() return line
def geometry_from_bytes(cls, bytes: bytearray) -> BaseGeometry: bin_parser = BinaryParser(bytes) g_type = bin_parser.read_byte() gm_type = bin_parser.read_byte() if GeomEnum.has_value(gm_type): name = GeomEnum.get_name(gm_type) parser: GeometryParser = cls.parsers[name] geom = parser.deserialize(bin_parser) return geom else: raise GeometryUnavailableException(f"Can not deserialize object")
def _deserialize_geom(cls, bin_parser: BinaryParser) -> GeoData: from geo_pyspark.sql.geometry import GeometryFactory user_data_length = bin_parser.read_int() geom = GeometryFactory.geometry_from_bytes(bin_parser) if user_data_length > 0: user_data = bin_parser.read_string(user_data_length) geo_data = GeoData(geom=geom, userData=user_data) else: geo_data = GeoData(geom=geom, userData="") return geo_data
def SpatialKnnQuery(self, spatialRDD: SpatialRDD, originalQueryPoint: Point, k: int, useIndex: bool): """ :param spatialRDD: spatialRDD :param originalQueryPoint: shapely.geometry.Point :param k: int :param useIndex: bool :return: pyspark.RDD """ jvm = spatialRDD._jvm sc = spatialRDD._sc coordinate = JvmCoordinate(jvm, originalQueryPoint.x, originalQueryPoint.y) point = JvmPoint(spatialRDD._jvm, coordinate.jvm_instance) jvm_point = point.jvm_instance knn_neighbours = jvm.KNNQuery.SpatialKnnQuery(spatialRDD._srdd, jvm_point, k, useIndex) srdd = jvm.GeoSerializerData.serializeToPython(knn_neighbours) geoms_data = [] for arr in srdd: binary_parser = BinaryParser(arr) geom = SpatialRDDParserData.deserialize(binary_parser) geoms_data.append(geom) return geoms_data
def deserialize(self, datum): from geo_pyspark.sql.geometry import GeometryFactory from geo_pyspark.utils.binary_parser import BinaryParser bin_parser = BinaryParser(datum) geom = GeometryFactory.geometry_from_bytes(bin_parser) return geom
def geometry_from_bytes(cls, bin_parser: BinaryParser) -> BaseGeometry: g_type = bin_parser.read_byte() shape_type = ShapeEnum.get_name(g_type) if shape_type == ShapeEnum.circle.name: return CircleParser.deserialize(bin_parser) elif shape_type == ShapeEnum.shape.name: gm_type = bin_parser.read_byte() if GeomEnum.has_value(gm_type): name = GeomEnum.get_name(gm_type) parser: GeometryParser = PARSERS[name] geom = parser.deserialize(bin_parser) return geom else: raise GeometryUnavailableException( f"Can not deserialize object")
def deserialize(cls, bin_parser: BinaryParser): left_geom_data = cls._deserialize_geom(bin_parser) _ = bin_parser.read_int() right_geom_data = cls._deserialize_geom(bin_parser) deserialized_data = [left_geom_data, right_geom_data] return deserialized_data
def deserialize(cls, parser: BinaryParser) -> Union[Polygon, MultiPolygon]: for _ in range(4): parser.read_double() num_rings = parser.read_int() num_points = parser.read_int() offsets = OffsetsReader.read_offsets(parser, num_parts=num_rings, max_offset=num_points) polygons = [] holes = [] shells_ccw = False shell = None for i in range(num_rings): read_scale = offsets[i + 1] - offsets[i] cs_ring = read_coordinates(parser, read_scale) if (len(cs_ring)) < 3: continue ring = LinearRing(cs_ring) if shell is None: shell = ring shells_ccw = LinearRing(cs_ring).is_ccw elif LinearRing(cs_ring).is_ccw != shells_ccw: holes.append(ring) else: if shell is not None: polygon = Polygon(shell, holes) polygons.append(polygon) shell = ring holes = [] if shell is not None: geometry = Polygon(shell, holes) polygons.append(geometry) has_user_data = parser.read_boolean() if has_user_data: parser.read_byte() parser.read_byte() parser.read_byte() if polygons.__len__() == 1: return polygons[0] return MultiPolygon(polygons)
def deserialize(cls, bin_parser: BinaryParser): left_geom_data = cls._deserialize_geom(bin_parser) geometry_numbers = bin_parser.read_int() right_geoms = [] for right_geometry_number in range(geometry_numbers): right_geom_data = cls._deserialize_geom(bin_parser) right_geoms.append(right_geom_data) deserialized_data = [left_geom_data, right_geoms ] if right_geoms else left_geom_data return deserialized_data
def __setstate__(self, attributes): from geo_pyspark.sql.geometry import GeometryFactory from geo_pyspark.utils.binary_parser import BinaryParser bin_parser = BinaryParser(attributes["geom"]) self._geom = GeometryFactory.geometry_from_bytes(bin_parser) self._userData = attributes["userData"]
def deserialize(cls, bin_parser: BinaryParser): radius = bin_parser.read_double_reverse() primitive_geom_type = bin_parser.read_byte() parser = GeomEnum.get_name(primitive_geom_type) geom = PARSERS[parser].deserialize(bin_parser) return Circle(geom, radius)
def read_coordinates(parser: BinaryParser, read_scale: int): coordinates = [] for i in range(read_scale): coordinates.append((parser.read_double(), parser.read_double())) return coordinates
def deserialize(cls, parser: BinaryParser) -> Point: x = parser.read_double() y = parser.read_double() return Point(x, y)