コード例 #1
0
class TestRectangleRange(TestBase):
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)
    loop_times = 5

    def test_spatial_range_query(self):
        spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                   True, StorageLevel.MEMORY_ONLY)

        for i in range(self.loop_times):
            result_size = RangeQuery.SpatialRangeQuery(spatial_rdd,
                                                       self.query_envelope,
                                                       False, False).count()
            assert result_size == 193

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False,
            False).take(10)[1].getUserData() is not None

    def test_spatial_range_query_using_index(self):
        spatial_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                   True, StorageLevel.MEMORY_ONLY)

        spatial_rdd.buildIndex(IndexType.RTREE, False)
        for i in range(self.loop_times):
            result_size = RangeQuery.SpatialRangeQuery(spatial_rdd,
                                                       self.query_envelope,
                                                       False, True).count()
            assert result_size == 193

        assert RangeQuery.SpatialRangeQuery(spatial_rdd, self.query_envelope, False, True).take(10)[1].getUserData()\
               is not None
コード例 #2
0
class TestPointRange(TestBase):
    loop_times = 5
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)

    def test_spatial_range_query(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                               False)
        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
                .count()
            assert result_size == 2830
        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False, False).take(10)[1].\
                   getUserData() is not None

    def test_spatial_range_query_using_index(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                               False)

        spatial_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False)\
                .count()
            assert result_size == 2830
        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False, False).take(10)[1].\
                   getUserData() is not None
コード例 #3
0
    def SpatialRangeQuery(self, spatialRDD: SpatialRDD,
                          rangeQueryWindow: Envelope,
                          considerBoundaryIntersection: bool,
                          usingIndex: bool):
        """

        :param spatialRDD:
        :param rangeQueryWindow:
        :param considerBoundaryIntersection:
        :param usingIndex:
        :return:
        """

        jvm = spatialRDD._jvm
        sc = spatialRDD._sc

        jvm_envelope = rangeQueryWindow.create_jvm_instance(jvm)

        srdd = jvm.\
            RangeQuery.SpatialRangeQuery(
            spatialRDD._srdd,
            jvm_envelope,
            considerBoundaryIntersection,
            usingIndex
        )

        serlialized = jvm.GeoSerializerData.serializeToPython(srdd)

        return RDD(serlialized, sc, GeoSparkPickler())
コード例 #4
0
class TestPolygonRange(TestBase):
    loop_times = 5
    query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01)

    def test_spatial_range_query(self):
        spatial_rdd = PolygonRDD(self.sc, input_location, splitter, True,
                                 StorageLevel.MEMORY_ONLY)
        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count()
            assert result_size == 704

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False,
            False).take(10)[0].getUserData() is not None

    def test_spatial_range_query_using_index(self):
        spatial_rdd = PolygonRDD(self.sc, input_location, splitter, True,
                                 StorageLevel.MEMORY_ONLY)
        spatial_rdd.buildIndex(IndexType.RTREE, False)
        for i in range(self.loop_times):
            result_size = RangeQuery.\
                SpatialRangeQuery(spatial_rdd, self.query_envelope, False, False).count()
            assert result_size == 704

        assert RangeQuery.SpatialRangeQuery(
            spatial_rdd, self.query_envelope, False,
            False).take(10)[0].getUserData() is not None
コード例 #5
0
class TestRectangleKNN(TestBase):
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)
    loop_times = 5
    query_point = Point(-84.01, 34.01)
    top_k = 100

    def test_spatial_knn_query(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point,
                                              self.top_k, False)

            assert result.__len__() > -1
            assert result[0].getUserData() is not None

    def test_spatial_knn_query_using_index(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)
        rectangle_rdd.buildIndex(IndexType.RTREE, False)

        for i in range(self.loop_times):
            result = KNNQuery.SpatialKnnQuery(rectangle_rdd, self.query_point,
                                              self.top_k, False)

            assert result.__len__() > -1
            assert result[0].getUserData() is not None

    def test_spatial_knn_query_correctness(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)

        result_no_index = KNNQuery.SpatialKnnQuery(rectangle_rdd,
                                                   self.query_point,
                                                   self.top_k, False)
        rectangle_rdd.buildIndex(IndexType.RTREE, False)

        result_with_index = KNNQuery.SpatialKnnQuery(rectangle_rdd,
                                                     self.query_point,
                                                     self.top_k, True)

        sorted_result_no_index = sorted(
            result_no_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, self.query_point))

        sorted_result_with_index = sorted(
            result_with_index,
            key=lambda geo_data: distance_sorting_functions(
                geo_data, self.query_point))

        difference = 0
        for x in range(self.top_k):
            difference += sorted_result_no_index[x].geom.distance(
                sorted_result_with_index[x].geom)

        assert difference == 0
コード例 #6
0
    def boundaryEnvelope(self) -> Envelope:
        """

        :return:
        """
        if not self._is_analyzed:
            raise TypeError("Please use analyze before")
        java_boundary_envelope = get_field(self._srdd, "boundaryEnvelope")
        return Envelope.from_jvm_instance(java_boundary_envelope)
コード例 #7
0
    def boundary(self) -> Envelope:
        """

        :return:
        """

        jvm_boundary = self._srdd.boundary()

        envelope = Envelope.from_jvm_instance(jvm_boundary)
        return envelope
コード例 #8
0
    def grids(self) -> Optional[List[Envelope]]:
        """
        Returns grids for SpatialRDD, it is a list of Envelopes.

        >> spatial_rdd.grids
        >> [Envelope(minx=10.0, maxx=12.0, miny=10.0, maxy=12.0)]
        :return:
        """
        jvm_grids = self.jvm_grids.jgrid
        if jvm_grids:
            number_of_grids = jvm_grids.size()

            envelopes = [
                Envelope.from_jvm_instance(jvm_grids[index])
                for index in range(number_of_grids)
            ]

            return envelopes
        else:
            return None
コード例 #9
0
 def test_boundary_envelope(self):
     spatial_rdd = self.create_spatial_rdd()
     spatial_rdd.analyze()
     assert Envelope(
         minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134) == spatial_rdd.boundaryEnvelope
コード例 #10
0
    def test_boundary(self):
        spatial_rdd = self.create_spatial_rdd()
        envelope = spatial_rdd.boundary()

        assert envelope == Envelope(minx=-173.120769, maxx=-84.965961, miny=30.244859, maxy=71.355134)
コード例 #11
0
from tests.test_base import TestBase
from tests.tools import tests_path

inputLocation = os.path.join(tests_path, "resources/zcta510-small.csv")
queryWindowSet = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
gridType = "rtree"
indexType = "rtree"
numPartitions = 11
distance = 0.001
queryPolygonSet = os.path.join(tests_path,
                               "resources/primaryroads-polygon.csv")
inputCount = 3000
inputBoundary = Envelope(minx=-171.090042,
                         maxx=145.830505,
                         miny=-14.373765,
                         maxy=49.00127)
matchCount = 17599
matchWithOriginalDuplicatesCount = 17738


class TestRectangleRDD(TestBase):
    def test_constructor(self):
        spatial_rdd = RectangleRDD(sparkContext=self.sc,
                                   InputLocation=inputLocation,
                                   Offset=offset,
                                   splitter=splitter,
                                   carryInputData=True,
                                   partitions=numPartitions,
                                   newLevel=StorageLevel.MEMORY_ONLY)
コード例 #12
0
ファイル: test_rdd.py プロジェクト: Imbruced/geo_pyspark
from tests.tools import tests_path

resource_folder = "resources"

point_rdd_input_location = os.path.join(tests_path, resource_folder,
                                        "arealm-small.csv")

point_rdd_splitter = FileDataSplitter.CSV

point_rdd_index_type = IndexType.RTREE
point_rdd_num_partitions = 5
point_rdd_offset = 1

knn_query_point = Point(-84.01, 34.01)

range_query_window = Envelope(-90.01, -80.01, 30.01, 40.01)

join_query_partitionin_type = GridType.QUADTREE
each_query_loop_times = 1


class TestSpatialRDD(TestBase):
    def test_empty_constructor_test(self):
        object_rdd = PointRDD(sparkContext=self.sc,
                              InputLocation=point_rdd_input_location,
                              Offset=point_rdd_offset,
                              splitter=point_rdd_splitter,
                              carryInputData=False)
        object_rdd_copy = PointRDD()
        object_rdd_copy.rawJvmSpatialRDD = object_rdd.rawJvmSpatialRDD
        object_rdd_copy.analyze()
コード例 #13
0
from geo_pyspark.core.spatialOperator import KNNQuery
from tests.test_base import TestBase
from tests.tools import tests_path, distance_sorting_functions

inputLocation = os.path.join(tests_path, "resources/zcta510-small.csv")
queryWindowSet = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
gridType = "rtree"
indexType = "rtree"
numPartitions = 11
distance = 0.001
queryPolygonSet = os.path.join(tests_path,
                               "resources/primaryroads-polygon.csv")
inputCount = 3000
inputBoundary = Envelope(-171.090042, 145.830505, -14.373765, 49.00127)
matchCount = 17599
matchWithOriginalDuplicatesCount = 17738


class TestRectangleKNN(TestBase):
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)
    loop_times = 5
    query_point = Point(-84.01, 34.01)
    top_k = 100

    def test_spatial_knn_query(self):
        rectangle_rdd = RectangleRDD(self.sc, inputLocation, offset, splitter,
                                     True)

        for i in range(self.loop_times):
コード例 #14
0
from geo_pyspark.core.spatialOperator import RangeQuery
from tests.test_base import TestBase
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/arealm-small.csv")
queryWindowSet = os.path.join("zcta510-small.csv")
offset = 1
splitter = FileDataSplitter.CSV
gridType = "rtree"
indexType = "rtree"
numPartitions = 11
distance = 0.01
queryPolygonSet = "primaryroads-polygon.csv"
inputCount = 3000
inputBoundary = Envelope(minx=-173.120769,
                         maxx=-84.965961,
                         miny=30.244859,
                         maxy=71.355134)
rectangleMatchCount = 103
rectangleMatchWithOriginalDuplicatesCount = 103
polygonMatchCount = 472
polygonMatchWithOriginalDuplicatesCount = 562


class TestPointRange(TestBase):
    loop_times = 5
    query_envelope = Envelope(-90.01, -80.01, 30.01, 40.01)

    def test_spatial_range_query(self):
        spatial_rdd = PointRDD(self.sc, input_location, offset, splitter,
                               False)
        for i in range(self.loop_times):
コード例 #15
0
import os

from geo_pyspark.core.enums import FileDataSplitter
from geo_pyspark.core.geom_types import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/arealm-small.csv")
query_window_set = os.path.join("zcta510-small.csv")
offset = 1
splitter = FileDataSplitter.CSV
grid_type = "rtree"
index_type = "rtree"
num_partitions = 11
distance = 0.01
query_polygon_set = "primaryroads-polygon.csv"
input_count = 3000
input_boundary = Envelope(
    minx=-173.120769,
    maxx=-84.965961,
    miny=30.244859,
    maxy=71.355134
)
rectangle_match_count = 103
rectangle_with_original_duplicates_count = 103
polygon_match_count = 472
polygon_match_with_original_duplicates_count = 562

transformed_envelope = Envelope(14313844.29433424, 16587207.463797055, 942450.5989896542, 6697987.652517834)
crs_point_test = os.path.join(tests_path, "resources/crs-test-point.csv")
crs_envelope = Envelope(26.992172, 71.35513400000001, -179.147236, 179.475569)
crs_envelope_transformed = Envelope(-5446655.086752228, 1983668.382852457, 534241.8622328962, 6143259.02554563)
コード例 #16
0
 def test_jvm_envelope(self):
     envelope = Envelope(0.0, 5.0, 0.0, 5.0)
     jvm_instance = envelope.create_jvm_instance(self.spark.sparkContext._jvm)
     envelope_area = jvm_instance.getArea()
     assert envelope_area == 25.0, f"Expected area to be equal 25 but {envelope_area} was found"
コード例 #17
0
ファイル: test_circle.py プロジェクト: Imbruced/geo_pyspark
 def test_get_envelope_internal(self):
     point = Point(0.0, 0.0)
     circle = Circle(point, 0.1)
     assert Envelope(-0.1, 0.1, -0.1, 0.1) == circle.getEnvelopeInternal()
コード例 #18
0
ファイル: crs_transform.py プロジェクト: Imbruced/geo_pyspark
import os

from shapely.geometry import Point

from geo_pyspark.core.enums import FileDataSplitter, GridType, IndexType
from geo_pyspark.core.geom_types import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/crs-test-point.csv")
offset = 0
splitter = FileDataSplitter.CSV
grid_type = GridType.RTREE
index_type = IndexType.RTREE
num_partitions = 11
distance = 0.01
input_location_query_polygon = os.path.join(tests_path,
                                            "resources/crs-test-polygon.csv")
loop_times = 5
query_envelope = Envelope(30.01, 40.01, -90.01, -80.01)
query_point = Point(34.01, -84.01)
top_k = 100
コード例 #19
0
from geo_pyspark.core.enums import FileDataSplitter, IndexType
from geo_pyspark.core.geom_types import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/primaryroads-polygon.csv")
query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
grid_type = "rtree"
index_type = "rtree"
num_partitions = 5
distance = 0.01
input_location_query_polygon = os.path.join(tests_path, "resources/crs-test-polygon.csv")
query_polygon_count = 13361
query_envelope = Envelope(14313844.29433424, 16802290.85383074, 942450.5989896542, 8631908.270651951)
query_polygon_set = os.path.join(tests_path, "resources/primaryroads-polygon.csv")
input_location_geo_json = os.path.join(tests_path, "resources/testPolygon.json")
input_location_wkt = os.path.join(tests_path, "resources/county_small.tsv")
input_location_wkb = os.path.join(tests_path, "resources/county_small_wkb.tsv")
input_count = 3000
input_boundary = Envelope(minx=-158.104182, maxx=-66.03575, miny=17.986328, maxy=48.645133)
contains_match_count = 6941
contains_match_with_original_duplicates_count = 9334
intersects_match_count = 24323
intersects_match_with_original_duplicates_count = 32726

polygon_rdd_input_location = os.path.join(tests_path, "resources/primaryroads-polygon.csv")
polygon_rdd_splitter = FileDataSplitter.CSV
polygon_rdd_index_type = IndexType.RTREE
polygon_rdd_num_partitions = 5
コード例 #20
0
import os

from geo_pyspark.core.enums import FileDataSplitter
from geo_pyspark.core.geom_types import Envelope
from tests.tools import tests_path

input_location = os.path.join(tests_path, "resources/primaryroads-linestring.csv")
query_window_set = os.path.join(tests_path, "resources/zcta510-small.csv")
offset = 0
splitter = FileDataSplitter.CSV
grid_type = "rtree"
index_type = "rtree"
num_partitions = 5
distance = 0.01
query_polygon_set = os.path.join(tests_path, "resources/primaryroads-polygon.csv")
input_count = 3000
input_boundary = Envelope(minx=-123.393766, maxx=-65.648659, miny=17.982169, maxy=49.002374)
input_boundary_2 = Envelope(minx=-123.393766, maxx=-65.649956, miny=17.982169, maxy=49.002374)
match_count = 535
match_with_origin_with_duplicates_count = 875

transformed_envelope = Envelope(14313844.29433424, 16791709.85358734, 942450.5989896542, 8474779.278028419)
transformed_envelope_2 = Envelope(14313844.29433424, 16791709.85358734, 942450.5989896542, 8474645.488977494)