def setRadius(self, givenRadius: float): center_geometry_mbr = Envelope.from_shapely_geom(self.centerGeometry) width = center_geometry_mbr.maxx - center_geometry_mbr.minx length = center_geometry_mbr.maxy - center_geometry_mbr.miny center_geometry_internal_radius = sqrt(width**2 + length**2) / 2 self.radius = givenRadius if givenRadius > center_geometry_internal_radius else center_geometry_internal_radius self.MBR = Envelope(self.centerPoint.x - self.radius, self.centerPoint.x + self.radius, self.centerPoint.y - self.radius, self.centerPoint.y + self.radius)
class TestPolygonRange(TestBase): loop_times = 5 query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01) def test_spatial_range_query(self): spatial_rdd = PolygonRDD( self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY ) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() assert result_size == 704 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[0].getUserData() is not None def test_spatial_range_query_using_index(self): spatial_rdd = PolygonRDD( self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY ) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() assert result_size == 704 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[0].getUserData() is not None
class TestPointRange(TestBase): loop_times = 5 query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) def test_spatial_range_query(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\ .count() assert result_size == 2830 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].\ getUserData() is not None def test_spatial_range_query_using_index(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\ .count() assert result_size == 2830 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].\ getUserData() is not None
class TestRectangleRange(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) loop_times = 5 def test_spatial_range_query(self): spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() assert result_size == 193 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].getUserData() is not None def test_spatial_range_query_using_index(self): spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, True).count() assert result_size == 193 assert RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, True).take(10)[1].getUserData()\ is not None
def boundaryEnvelope(self) -> Envelope: """ :return: """ if not self._is_analyzed: raise TypeError("Please use analyze before") java_boundary_envelope = get_field(self._srdd, "boundaryEnvelope") return Envelope.from_jvm_instance(java_boundary_envelope)
def boundary(self) -> Envelope: """ :return: """ jvm_boundary = self._srdd.boundary() envelope = Envelope.from_jvm_instance(jvm_boundary) return envelope
def test_to_spatial_rdd_df(self): spatial_df = self._create_spatial_point_table() spatial_rdd = Adapter.toSpatialRdd(spatial_df) spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 121960 assert spatial_rdd.boundaryEnvelope == Envelope( -179.147236, 179.475569, -14.548699, 71.35513400000001)
def __init__(self, centerGeometry: BaseGeometry, givenRadius: float): self.MBR = None self.centerGeometry = centerGeometry self.radius = givenRadius center_geometry_mbr = Envelope.from_shapely_geom(self.centerGeometry) self.centerPoint = self.centerPoint = Point( (center_geometry_mbr.minx + center_geometry_mbr.maxx) / 2.0, (center_geometry_mbr.miny + center_geometry_mbr.maxy) / 2.0) width = center_geometry_mbr.maxx - center_geometry_mbr.minx length = center_geometry_mbr.maxy - center_geometry_mbr.miny center_geometry_internal_radius = sqrt(width**2 + length**2) / 2.0 self.radius = givenRadius if givenRadius > center_geometry_internal_radius else center_geometry_internal_radius self.MBR = Envelope(self.centerPoint.x - self.radius, self.centerPoint.x + self.radius, self.centerPoint.y - self.radius, self.centerPoint.y + self.radius) super().__init__(self.centerPoint.buffer(self.radius))
def test_read_to_point_rdd(self): input_location = os.path.join(tests_path, "resources/shapefiles/point") spatial_rdd = ShapefileReader.readToPointRDD(self.sc, input_location) geometry_rdd = ShapefileReader.readToGeometryRDD( self.sc, input_location) window = Envelope(-180.0, 180.0, -90.0, 90.0) count = RangeQuery.SpatialRangeQuery(spatial_rdd, window, False, False).count() assert spatial_rdd.rawSpatialRDD.count() == count assert 'org.datasyslab.geospark.spatialRDD.SpatialRDD' in geometry_rdd._srdd.toString( ) assert 'org.datasyslab.geospark.spatialRDD.PointRDD' in spatial_rdd._srdd.toString( )
def test_to_rdd_from_dataframe(self): spatial_df = self._create_spatial_point_table() spatial_df.show() jsrdd = Adapter.toRdd(spatial_df) spatial_rdd = SpatialRDD(self.sc) spatial_rdd.rawJvmSpatialRDD = jsrdd spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 121960 assert spatial_rdd.boundaryEnvelope == Envelope( -179.147236, 179.475569, -14.548699, 71.35513400000001)
def grids(self) -> Optional[List[Envelope]]: """ Returns grids for SpatialRDD, it is a list of Envelopes. >> spatial_rdd.grids >> [Envelope(minx=10.0, maxx=12.0, miny=10.0, maxy=12.0)] :return: """ jvm_grids = self.jvm_grids.jgrid if jvm_grids: number_of_grids = jvm_grids.size() envelopes = [Envelope.from_jvm_instance(jvm_grids[index]) for index in range(number_of_grids)] return envelopes else: return None
import os from geospark.core.enums import FileDataSplitter from geospark.core.geom.envelope import Envelope from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/arealm-small.csv") query_window_set = os.path.join("zcta510-small.csv") offset = 1 splitter = FileDataSplitter.CSV grid_type = "rtree" index_type = "rtree" num_partitions = 11 distance = 0.01 query_polygon_set = "primaryroads-polygon.csv" input_count = 3000 input_boundary = Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) rectangle_match_count = 103 rectangle_with_original_duplicates_count = 103 polygon_match_count = 472 polygon_match_with_original_duplicates_count = 562 transformed_envelope = Envelope(14313844.29433424, 16587207.463797055, 942450.5989896542, 6697987.652517834) crs_point_test = os.path.join(tests_path, "resources/crs-test-point.csv") crs_envelope = Envelope(26.992172, 71.35513400000001, -179.147236, 179.475569) crs_envelope_transformed = Envelope(-5446655.086752228, 1983668.382852457, 534241.8622328962, 6143259.02554563)
def test_boundary(self): spatial_rdd = self.create_spatial_rdd() envelope = spatial_rdd.boundary() assert envelope == Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134)
def test_boundary_envelope(self): spatial_rdd = self.create_spatial_rdd() spatial_rdd.analyze() assert Envelope( minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) == spatial_rdd.boundaryEnvelope
def _compute_envelope_internal(self): if self.is_empty: return Envelope() return self.MBR
def test_jvm_envelope(self): envelope = Envelope(0.0, 5.0, 0.0, 5.0) jvm_instance = envelope.create_jvm_instance(self.spark.sparkContext._jvm) envelope_area = jvm_instance.getArea() assert envelope_area == 25.0, f"Expected area to be equal 25 but {envelope_area} was found"
from geospark.core.geom.envelope import Envelope from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/primaryroads-linestring.csv") query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV grid_type = "rtree" index_type = "rtree" num_partitions = 5 distance = 0.01 query_polygon_set = os.path.join(tests_path, "resources/primaryroads-polygon.csv") input_count = 3000 input_boundary = Envelope(minx=-123.393766, maxx=-65.648659, miny=17.982169, maxy=49.002374) input_boundary_2 = Envelope(minx=-123.393766, maxx=-65.649956, miny=17.982169, maxy=49.002374) match_count = 535 match_with_origin_with_duplicates_count = 875 transformed_envelope = Envelope(14313844.294334238, 16791709.853587367, 942450.5989896103, 8474779.278028358) transformed_envelope_2 = Envelope(14313844.294334238, 16791709.853587367, 942450.5989896103, 8474645.488977432)
from geospark.core.spatialOperator import RangeQuery from tests.test_base import TestBase from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/arealm-small.csv") queryWindowSet = os.path.join("zcta510-small.csv") offset = 1 splitter = FileDataSplitter.CSV gridType = "rtree" indexType = "rtree" numPartitions = 11 distance = 0.01 queryPolygonSet = "primaryroads-polygon.csv" inputCount = 3000 inputBoundary = Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) rectangleMatchCount = 103 rectangleMatchWithOriginalDuplicatesCount = 103 polygonMatchCount = 472 polygonMatchWithOriginalDuplicatesCount = 562 class TestPointRange(TestBase): loop_times = 5 query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) def test_spatial_range_query(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False) for i in range(self.loop_times):
class TestRectangleKNN(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) loop_times = 5 query_point = Point(-84.01, 34.01) top_k = 100 query_polygon = Polygon([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01), (-84.01, 34.01)]) query_line = LineString([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01)]) def test_spatial_knn_query(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) for i in range(self.loop_times): result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, False) assert result.__len__() > -1 assert result[0].getUserData() is not None def test_spatial_knn_query_using_index(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) rectangle_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, False) assert result.__len__() > -1 assert result[0].getUserData() is not None def test_spatial_knn_query_correctness(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, False) rectangle_rdd.buildIndex(IndexType.RTREE, False) result_with_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, True) sorted_result_no_index = sorted( result_no_index, key=lambda geo_data: distance_sorting_functions( geo_data, self.query_point)) sorted_result_with_index = sorted( result_with_index, key=lambda geo_data: distance_sorting_functions( geo_data, self.query_point)) difference = 0 for x in range(self.top_k): difference += sorted_result_no_index[x].geom.distance( sorted_result_with_index[x].geom) assert difference == 0 def test_spatial_knn_using_polygon(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_polygon, self.top_k, False) print(result_no_index) def test_spatial_knn_using_linestring(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_line, self.top_k, False) print(result_no_index)
from tests.tools import tests_path resource_folder = "resources" point_rdd_input_location = os.path.join(tests_path, resource_folder, "arealm-small.csv") point_rdd_splitter = FileDataSplitter.CSV point_rdd_index_type = IndexType.RTREE point_rdd_num_partitions = 5 point_rdd_offset = 1 knn_query_point = Point(-84.01, 34.01) range_query_window = Envelope(-90.01, -80.01, 30.01, 40.01) join_query_partitionin_type = GridType.QUADTREE each_query_loop_times = 1 class TestSpatialRDD(TestBase): def test_empty_constructor_test(self): object_rdd = PointRDD(sparkContext=self.sc, InputLocation=point_rdd_input_location, Offset=point_rdd_offset, splitter=point_rdd_splitter, carryInputData=False) object_rdd_copy = PointRDD() object_rdd_copy.rawJvmSpatialRDD = object_rdd.rawJvmSpatialRDD object_rdd_copy.analyze()
from geospark.core.spatialOperator import KNNQuery from tests.test_base import TestBase from tests.tools import tests_path, distance_sorting_functions inputLocation = os.path.join(tests_path, "resources/zcta510-small.csv") queryWindowSet = os.path.join(tests_path, "resources/zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV gridType = "rtree" indexType = "rtree" numPartitions = 11 distance = 0.001 queryPolygonSet = os.path.join(tests_path, "resources/primaryroads-polygon.csv") inputCount = 3000 inputBoundary = Envelope(-171.090042, 145.830505, -14.373765, 49.00127) matchCount = 17599 matchWithOriginalDuplicatesCount = 17738 class TestRectangleKNN(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) loop_times = 5 query_point = Point(-84.01, 34.01) top_k = 100 query_polygon = Polygon([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01), (-84.01, 34.01)]) query_line = LineString([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01)]) def test_spatial_knn_query(self):
from tests.test_base import TestBase from tests.tools import tests_path inputLocation = os.path.join(tests_path, "resources/zcta510-small.csv") queryWindowSet = os.path.join(tests_path, "resources/zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV gridType = "rtree" indexType = "rtree" numPartitions = 11 distance = 0.001 queryPolygonSet = os.path.join(tests_path, "resources/primaryroads-polygon.csv") inputCount = 3000 inputBoundary = Envelope(minx=-171.090042, maxx=145.830505, miny=-14.373765, maxy=49.00127) matchCount = 17599 matchWithOriginalDuplicatesCount = 17738 class TestRectangleRDD(TestBase): def test_constructor(self): spatial_rdd = RectangleRDD(sparkContext=self.sc, InputLocation=inputLocation, Offset=offset, splitter=splitter, carryInputData=True, partitions=numPartitions, newLevel=StorageLevel.MEMORY_ONLY)
import os from shapely.geometry import Point from geospark.core.enums import FileDataSplitter, GridType, IndexType from geospark.core.geom.envelope import Envelope from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/crs-test-point.csv") offset = 0 splitter = FileDataSplitter.CSV grid_type = GridType.RTREE index_type = IndexType.RTREE num_partitions = 11 distance = 0.01 input_location_query_polygon = os.path.join(tests_path, "resources/crs-test-polygon.csv") loop_times = 5 query_envelope = Envelope(30.01, 40.01, -90.01, -80.01) query_point = Point(34.01, -84.01) top_k = 100
from geospark.core.enums import FileDataSplitter, IndexType from geospark.core.geom.envelope import Envelope from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/primaryroads-polygon.csv") query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV grid_type = "rtree" index_type = "rtree" num_partitions = 5 distance = 0.01 input_location_query_polygon = os.path.join(tests_path, "resources/crs-test-polygon.csv") query_polygon_count = 13361 query_envelope = Envelope(14313844.29433424, 16802290.85383074, 942450.5989896542, 8631908.270651951) query_polygon_set = os.path.join(tests_path, "resources/primaryroads-polygon.csv") input_location_geo_json = os.path.join(tests_path, "resources/testPolygon.json") input_location_wkt = os.path.join(tests_path, "resources/county_small.tsv") input_location_wkb = os.path.join(tests_path, "resources/county_small_wkb.tsv") input_count = 3000 input_boundary = Envelope(minx=-158.104182, maxx=-66.03575, miny=17.986328, maxy=48.645133) contains_match_count = 6941 contains_match_with_original_duplicates_count = 9334 intersects_match_count = 24323 intersects_match_with_original_duplicates_count = 32726
def test_get_envelope_internal(self): point = Point(0.0, 0.0) circle = Circle(point, 0.1) assert Envelope(-0.1, 0.1, -0.1, 0.1) == circle.getEnvelopeInternal()