def setRadius(self, givenRadius: float): center_geometry_mbr = Envelope.from_shapely_geom(self.centerGeometry) width = center_geometry_mbr.maxx - center_geometry_mbr.minx length = center_geometry_mbr.maxy - center_geometry_mbr.miny center_geometry_internal_radius = sqrt(width**2 + length**2) / 2 self.radius = givenRadius if givenRadius > center_geometry_internal_radius else center_geometry_internal_radius self.MBR = Envelope(self.centerPoint.x - self.radius, self.centerPoint.x + self.radius, self.centerPoint.y - self.radius, self.centerPoint.y + self.radius)
class TestPointRange(TestBase): loop_times = 5 query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) def test_spatial_range_query(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\ .count() assert result_size == 2830 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].\ getUserData() is not None def test_spatial_range_query_using_index(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\ .count() assert result_size == 2830 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].\ getUserData() is not None
class TestPolygonRange(TestBase): loop_times = 5 query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01) def test_spatial_range_query(self): spatial_rdd = PolygonRDD(self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() assert result_size == 704 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[0].getUserData() is not None def test_spatial_range_query_using_index(self): spatial_rdd = PolygonRDD(self.sc, input_location, splitter, True, StorageLevel.MEMORY_ONLY) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.\ SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() assert result_size == 704 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[0].getUserData() is not None
def test_boundary_envelope(self): spatial_rdd = self.create_spatial_rdd() spatial_rdd.analyze() assert Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) == spatial_rdd.boundaryEnvelope
class TestRectangleRange(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) loop_times = 5 def test_spatial_range_query(self): spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count() assert result_size == 193 assert RangeQuery.SpatialRangeQuery( spatial_rdd, self.query_envelope, False, False).take(10)[1].getUserData() is not None def test_spatial_range_query_using_index(self): spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True, StorageLevel.MEMORY_ONLY) spatial_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result_size = RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, True).count() assert result_size == 193 assert RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, True).take(10)[1].getUserData()\ is not None
def test_boundary(self): spatial_rdd = self.create_spatial_rdd() envelope = spatial_rdd.boundary() assert envelope == Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134)
def boundaryEnvelope(self) -> Envelope: """ :return: """ if not self._is_analyzed: raise TypeError("Please use analyze before") java_boundary_envelope = get_field(self._srdd, "boundaryEnvelope") return Envelope.from_jvm_instance(java_boundary_envelope)
def test_read_to_point_rdd_multipoint(self): input_location = os.path.join(tests_path, "resources/shapefiles/multipoint") spatial_rdd = ShapefileReader.readToPointRDD(self.sc, input_location) geometry_rdd = ShapefileReader.readToGeometryRDD(self.sc, input_location) window = Envelope(-180.0, 180.0, -90.0, 90.0) count = RangeQuery.SpatialRangeQuery(spatial_rdd, window, False, False).count() assert spatial_rdd.rawSpatialRDD.count() == count assert 'org.apache.sedona.core.spatialRDD.SpatialRDD' in geometry_rdd._srdd.toString() assert 'org.apache.sedona.core.spatialRDD.PointRDD' in spatial_rdd._srdd.toString()
def __init__(self, centerGeometry: BaseGeometry, givenRadius: float): self.MBR = None self.centerGeometry = centerGeometry self.radius = givenRadius center_geometry_mbr = Envelope.from_shapely_geom(self.centerGeometry) self.centerPoint = self.centerPoint = Point( (center_geometry_mbr.minx + center_geometry_mbr.maxx) / 2.0, (center_geometry_mbr.miny + center_geometry_mbr.maxy) / 2.0) width = center_geometry_mbr.maxx - center_geometry_mbr.minx length = center_geometry_mbr.maxy - center_geometry_mbr.miny center_geometry_internal_radius = sqrt(width**2 + length**2) / 2.0 self.radius = givenRadius if givenRadius > center_geometry_internal_radius else center_geometry_internal_radius self.MBR = Envelope(self.centerPoint.x - self.radius, self.centerPoint.x + self.radius, self.centerPoint.y - self.radius, self.centerPoint.y + self.radius) super().__init__(self.centerPoint.buffer(self.radius))
def boundary(self) -> Envelope: """ :return: """ jvm_boundary = self._srdd.boundary() envelope = Envelope.from_jvm_instance(jvm_boundary) return envelope
def test_to_spatial_rdd_df(self): spatial_df = self._create_spatial_point_table() spatial_rdd = Adapter.toSpatialRdd(spatial_df, "geometry") spatial_rdd.analyze() assert spatial_rdd.approximateTotalCount == 121960 assert spatial_rdd.boundaryEnvelope == Envelope( -179.147236, 179.475569, -14.548699, 71.35513400000001)
def test_read_to_linestring_rdd(self): input_location = os.path.join(tests_resource, "shapefiles/polyline") spatial_rdd = ShapefileReader.readToLineStringRDD( self.sc, input_location) geometry_rdd = ShapefileReader.readToGeometryRDD( self.sc, input_location) window = Envelope(-180.0, 180.0, -90.0, 90.0) count = RangeQuery.SpatialRangeQuery(spatial_rdd, window, False, False).count() assert spatial_rdd.rawSpatialRDD.count() == count assert 'org.apache.sedona.core.spatialRDD.SpatialRDD' in geometry_rdd._srdd.toString( ) assert 'org.apache.sedona.core.spatialRDD.LineStringRDD' in spatial_rdd._srdd.toString( )
def grids(self) -> Optional[List[Envelope]]: """ Returns grids for SpatialRDD, it is a list of Envelopes. >> spatial_rdd.grids >> [Envelope(minx=10.0, maxx=12.0, miny=10.0, maxy=12.0)] :return: """ jvm_grids = self.jvm_grids.jgrid if jvm_grids: number_of_grids = jvm_grids.size() envelopes = [Envelope.from_jvm_instance(jvm_grids[index]) for index in range(number_of_grids)] return envelopes else: return None
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os from sedona.core.enums import FileDataSplitter from sedona.core.geom.envelope import Envelope from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/primaryroads-linestring.csv") query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV grid_type = "kdbtree" index_type = "rtree" num_partitions = 5 distance = 0.01 query_polygon_set = os.path.join(tests_path, "resources/primaryroads-polygon.csv") input_count = 3000 input_boundary = Envelope(minx=-123.393766, maxx=-65.648659, miny=17.982169, maxy=49.002374) input_boundary_2 = Envelope(minx=-123.393766, maxx=-65.649956, miny=17.982169, maxy=49.002374) match_count = 535 match_with_origin_with_duplicates_count = 875 transformed_envelope = Envelope(14313844.294334238, 16791709.853587367, 942450.5989896103, 8474779.278028358) transformed_envelope_2 = Envelope(14313844.294334238, 16791709.853587367, 942450.5989896103, 8474645.488977432)
from tests.test_base import TestBase from tests.tools import tests_path inputLocation = os.path.join(tests_path, "resources/zcta510-small.csv") queryWindowSet = os.path.join(tests_path, "resources/zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV gridType = "rtree" indexType = "rtree" numPartitions = 11 distance = 0.001 queryPolygonSet = os.path.join(tests_path, "resources/primaryroads-polygon.csv") inputCount = 3000 inputBoundary = Envelope(minx=-171.090042, maxx=145.830505, miny=-14.373765, maxy=49.00127) matchCount = 17599 matchWithOriginalDuplicatesCount = 17738 class TestRectangleRDD(TestBase): def test_constructor(self): spatial_rdd = RectangleRDD(sparkContext=self.sc, InputLocation=inputLocation, Offset=offset, splitter=splitter, carryInputData=True, partitions=numPartitions, newLevel=StorageLevel.MEMORY_ONLY)
from sedona.core.geom.envelope import Envelope from sedona.core.spatialOperator import KNNQuery from tests.test_base import TestBase from tests.tools import tests_resource, distance_sorting_functions inputLocation = os.path.join(tests_resource, "zcta510-small.csv") queryWindowSet = os.path.join(tests_resource, "zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV gridType = "rtree" indexType = "rtree" numPartitions = 11 distance = 0.001 queryPolygonSet = os.path.join(tests_resource, "primaryroads-polygon.csv") inputCount = 3000 inputBoundary = Envelope(-171.090042, 145.830505, -14.373765, 49.00127) matchCount = 17599 matchWithOriginalDuplicatesCount = 17738 class TestRectangleKNN(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) loop_times = 5 query_point = Point(-84.01, 34.01) top_k = 100 query_polygon = Polygon([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01), (-84.01, 34.01)]) query_line = LineString([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01)]) def test_spatial_knn_query(self):
point_rdd_input_location = os.path.join(tests_resource, "arealm-small.csv") point_rdd_splitter = FileDataSplitter.CSV point_rdd_index_type = IndexType.RTREE point_rdd_num_partitions = 5 point_rdd_offset = 1 polygon_rdd_input_location = os.path.join(tests_resource, "primaryroads-polygon.csv") polygon_rdd_splitter = FileDataSplitter.CSV polygon_rdd_num_partitions = 5 polygon_rdd_start_offset = 0 polygon_rdd_end_offset = 9 knn_query_point = Point(-84.01, 34.01) range_query_window = Envelope(-90.01, -80.01, 30.01, 40.01) join_query_partitioning_type = GridType.QUADTREE each_query_loop_times = 20 shape_file_input_location = os.path.join(tests_resource, "shapefiles/polygon") class TestScalaExample(TestBase): def test_spatial_range_query(self): object_rdd = PointRDD( self.sc, point_rdd_input_location, point_rdd_offset, point_rdd_splitter, True, StorageLevel.MEMORY_ONLY ) object_rdd.rawJvmSpatialRDD.persist(StorageLevel.MEMORY_ONLY) for _ in range(each_query_loop_times): result_size = RangeQuery.SpatialRangeQuery(object_rdd, range_query_window, False, False).count()
from sedona.core.spatialOperator import RangeQuery from tests.test_base import TestBase from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/arealm-small.csv") queryWindowSet = os.path.join("zcta510-small.csv") offset = 1 splitter = FileDataSplitter.CSV gridType = "rtree" indexType = "rtree" numPartitions = 11 distance = 0.01 queryPolygonSet = "primaryroads-polygon.csv" inputCount = 3000 inputBoundary = Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) rectangleMatchCount = 103 rectangleMatchWithOriginalDuplicatesCount = 103 polygonMatchCount = 472 polygonMatchWithOriginalDuplicatesCount = 562 class TestPointRange(TestBase): loop_times = 5 query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) def test_spatial_range_query(self): spatial_rdd = PointRDD(self.sc, input_location, offset, splitter, False) for i in range(self.loop_times):
# with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os from shapely.geometry import Point from sedona.core.enums import FileDataSplitter, IndexType from sedona.core.geom.envelope import Envelope from tests.tools import tests_resource input_location = os.path.join(tests_resource, "crs-test-point.csv") offset = 0 splitter = FileDataSplitter.CSV index_type = IndexType.RTREE num_partitions = 11 distance = 0.01 input_location_query_polygon = os.path.join(tests_resource, "crs-test-polygon.csv") loop_times = 5 query_envelope = Envelope(30.01, 40.01, -90.01, -80.01) query_point = Point(34.01, -84.01) top_k = 100
from sedona.core.enums import FileDataSplitter, IndexType from sedona.core.geom.envelope import Envelope from tests.tools import tests_path input_location = os.path.join(tests_path, "resources/primaryroads-polygon.csv") query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv") offset = 0 splitter = FileDataSplitter.CSV grid_type = "rtree" index_type = "rtree" num_partitions = 5 distance = 0.01 input_location_query_polygon = os.path.join(tests_path, "resources/crs-test-polygon.csv") query_polygon_count = 13361 query_envelope = Envelope(14313844.294334238, 16802290.853830762, 942450.5989896103, 8631908.270651892) query_polygon_set = os.path.join(tests_path, "resources/primaryroads-polygon.csv") input_location_geo_json = os.path.join(tests_path, "resources/testPolygon.json") input_location_wkt = os.path.join(tests_path, "resources/county_small.tsv") input_location_wkb = os.path.join(tests_path, "resources/county_small_wkb.tsv") input_count = 3000 input_boundary = Envelope(minx=-158.104182, maxx=-66.03575, miny=17.986328, maxy=48.645133) contains_match_count = 6941 contains_match_with_original_duplicates_count = 9334 intersects_match_count = 24323 intersects_match_with_original_duplicates_count = 32726 polygon_rdd_input_location = os.path.join(tests_path, "resources/primaryroads-polygon.csv") polygon_rdd_splitter = FileDataSplitter.CSV polygon_rdd_index_type = IndexType.RTREE polygon_rdd_num_partitions = 5
class TestRectangleKNN(TestBase): query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01) loop_times = 5 query_point = Point(-84.01, 34.01) top_k = 100 query_polygon = Polygon([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01), (-84.01, 34.01)]) query_line = LineString([(-84.01, 34.01), (-84.01, 34.11), (-83.91, 34.11), (-83.91, 34.01)]) def test_spatial_knn_query(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) for i in range(self.loop_times): result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, False) assert result.__len__() > -1 assert result[0].getUserData() is not None def test_spatial_knn_query_using_index(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) rectangle_rdd.buildIndex(IndexType.RTREE, False) for i in range(self.loop_times): result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, False) assert result.__len__() > -1 assert result[0].getUserData() is not None def test_spatial_knn_query_correctness(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, False) rectangle_rdd.buildIndex(IndexType.RTREE, False) result_with_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point, self.top_k, True) sorted_result_no_index = sorted( result_no_index, key=lambda geo_data: distance_sorting_functions( geo_data, self.query_point)) sorted_result_with_index = sorted( result_with_index, key=lambda geo_data: distance_sorting_functions( geo_data, self.query_point)) difference = 0 for x in range(self.top_k): difference += sorted_result_no_index[x].geom.distance( sorted_result_with_index[x].geom) assert difference == 0 def test_spatial_knn_using_polygon(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_polygon, self.top_k, False) print(result_no_index) def test_spatial_knn_using_linestring(self): rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter, True) result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_line, self.top_k, False) print(result_no_index)
def _compute_envelope_internal(self): if self.is_empty: return Envelope() return self.MBR
def test_jvm_envelope(self): envelope = Envelope(0.0, 5.0, 0.0, 5.0) jvm_instance = envelope.create_jvm_instance( self.spark.sparkContext._jvm) envelope_area = jvm_instance.getArea() assert envelope_area == 25.0, f"Expected area to be equal 25 but {envelope_area} was found"
import os from sedona.core.enums import FileDataSplitter from sedona.core.geom.envelope import Envelope from tests.tools import tests_resource input_location = os.path.join(tests_resource, "arealm-small.csv") query_window_set = os.path.join(tests_resource, "zcta510-small.csv") offset = 1 splitter = FileDataSplitter.CSV grid_type = "rtree" index_type = "rtree" num_partitions = 11 distance = 0.01 query_polygon_set = "primaryroads-polygon.csv" input_count = 3000 input_boundary = Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) rectangle_match_count = 103 rectangle_with_original_duplicates_count = 103 polygon_match_count = 472 polygon_match_with_original_duplicates_count = 562 transformed_envelope = Envelope(14313844.294334238, 16587207.463797076, 942450.5989896103, 6697987.652517772) crs_point_test = os.path.join(tests_resource, "crs-test-point.csv") crs_envelope = Envelope(26.992172, 71.35513400000001, -179.147236, 179.475569) crs_envelope_transformed = Envelope(-5446655.086752236, 1983668.3828524568, 534241.8622328975, 6143259.025545624)