Example #1
0
 def assertEsriJsonBecomesGeoJson(self, esrijson, geojson):
     out_json = esri2geojson(esrijson)
     self.assertDictEqual(out_json, geojson)
Example #2
0
 def assertEsriJsonBecomesGeoJson(self, esrijson, geojson):
     out_json = esri2geojson(esrijson)
     self.assertDictEqual(out_json, geojson)
Example #3
0
    def __iter__(self):
        query_fields = self._fields
        metadata = self.get_metadata()
        page_size = min(1000, metadata.get('maxRecordCount', 500))
        geometry_type = metadata.get('geometryType')

        row_count = None

        try:
            row_count = self.get_feature_count()
        except EsriDownloadError:
            self._logger.info("Source does not support feature count")

        page_args = []

        if row_count is not None and (metadata.get('supportsPagination') or \
                (metadata.get('advancedQueryCapabilities') and metadata['advancedQueryCapabilities']['supportsPagination'])):
            # If the layer supports pagination, we can use resultOffset/resultRecordCount to paginate

            # There's a bug where some servers won't handle these queries in combination with a list of
            # fields specified. We'll make a single, 1 row query here to check if the server supports this
            # and switch to querying for all fields if specifying the fields fails.
            if query_fields and not self.can_handle_pagination(query_fields):
                self._logger.info(
                    "Source does not support pagination with fields specified, so querying for all fields."
                )
                query_fields = None

            for offset in range(self._startWith, row_count, page_size):
                query_args = self._build_query_args({
                    'resultOffset':
                    offset,
                    'resultRecordCount':
                    page_size,
                    'where':
                    '1=1',
                    'geometryPrecision':
                    self._precision,
                    'returnGeometry':
                    self._request_geometry,
                    'outSR':
                    self._outSR,
                    'outFields':
                    ','.join(query_fields or ['*']),
                    'f':
                    'json',
                })
                page_args.append(query_args)
            self._logger.info("Built %s requests using resultOffset method",
                              len(page_args))
        else:
            # If not, we can still use the `where` argument to paginate

            use_oids = True
            oid_field_name = self._find_oid_field_name(metadata)

            if not oid_field_name:
                raise EsriDownloadError(
                    "Could not find object ID field name for deduplication")

            if metadata.get('supportsStatistics'):
                # If the layer supports statistics, we can request maximum and minimum object ID
                # to help build the pages
                try:
                    (oid_min,
                     oid_max) = self._get_layer_min_max(oid_field_name)

                    for page_min in range(oid_min - 1, oid_max, page_size):
                        page_max = min(page_min + page_size, oid_max)
                        query_args = self._build_query_args({
                            'where':
                            '{} > {} AND {} <= {}'.format(
                                oid_field_name,
                                page_min,
                                oid_field_name,
                                page_max,
                            ),
                            'geometryPrecision':
                            self._precision,
                            'returnGeometry':
                            self._request_geometry,
                            'outSR':
                            self._outSR,
                            'outFields':
                            ','.join(query_fields or ['*']),
                            'f':
                            'json',
                        })
                        page_args.append(query_args)
                    self._logger.info(
                        "Built {} requests using OID where clause method".
                        format(len(page_args)))

                    # If we reach this point we don't need to fall through to enumerating all object IDs
                    # because the statistics method worked
                    use_oids = False
                except EsriDownloadError:
                    self._logger.exception(
                        "Finding max/min from statistics failed. Trying OID enumeration."
                    )

            if use_oids:
                # If the layer does not support statistics, we can request
                # all the individual IDs and page through them one chunk at
                # a time.

                try:
                    oids = sorted(map(int, self._get_layer_oids()))

                    for i in range(0, len(oids), page_size):
                        oid_chunk = oids[i:i + page_size]
                        page_min = oid_chunk[0]
                        page_max = oid_chunk[-1]
                        query_args = self._build_query_args({
                            'where':
                            '{} >= {} AND {} <= {}'.format(
                                oid_field_name,
                                page_min,
                                oid_field_name,
                                page_max,
                            ),
                            'geometryPrecision':
                            self._precision,
                            'returnGeometry':
                            self._request_geometry,
                            'outSR':
                            self._outSR,
                            'outFields':
                            ','.join(query_fields or ['*']),
                            'f':
                            'json',
                        })
                        page_args.append(query_args)
                    self._logger.info(
                        "Built %s requests using OID enumeration method",
                        len(page_args))
                except EsriDownloadError:
                    self._logger.info("Falling back to geo queries")
                    # Use geospatial queries when none of the ID-based methods will work
                    bounds = metadata['extent']
                    saved = set()

                    for feature in self._scrape_an_envelope(
                            bounds, self._outSR, page_size):
                        attrs = feature['attributes']
                        oid = attrs.get(oid_field_name)
                        if oid in saved:
                            continue

                        yield esri2geojson(feature)

                        saved.add(oid)

                    return

        query_url = self._build_url('/query')
        headers = self._build_headers()
        for query_args in page_args:
            try:
                response = self._request('POST',
                                         query_url,
                                         headers=headers,
                                         data=query_args)
                data = self._handle_esri_errors(
                    response, "Could not retrieve this chunk of objects")
            except socket.timeout as e:
                raise EsriDownloadError("Timeout when connecting to URL", e)
            except ValueError as e:
                raise EsriDownloadError("Could not parse JSON", e)
            except Exception as e:
                raise EsriDownloadError("Could not connect to URL", e)

            error = data.get('error')
            if error:
                raise EsriDownloadError(
                    "Problem querying ESRI dataset with args {}. Server said: {}"
                    .format(query_args, error['message']))

            features = data.get('features')

            for feature in features:
                yield esri2geojson(feature)
Example #4
0
    def __iter__(self):
        query_fields = self._fields
        metadata = self.get_metadata()
        page_size = min(1000, metadata.get('maxRecordCount', 500))
        geometry_type = metadata.get('geometryType')

        row_count = None

        try:
            row_count = self.get_feature_count()
        except EsriDownloadError:
            self._logger.info("Source does not support feature count")

        page_args = []

        if row_count is not None and (metadata.get('supportsPagination') or \
                (metadata.get('advancedQueryCapabilities') and metadata['advancedQueryCapabilities']['supportsPagination'])):
            # If the layer supports pagination, we can use resultOffset/resultRecordCount to paginate

            # There's a bug where some servers won't handle these queries in combination with a list of
            # fields specified. We'll make a single, 1 row query here to check if the server supports this
            # and switch to querying for all fields if specifying the fields fails.
            if query_fields and not self.can_handle_pagination(query_fields):
                self._logger.info("Source does not support pagination with fields specified, so querying for all fields.")
                query_fields = None

            for offset in range(self._startWith, row_count, page_size):
                query_args = self._build_query_args({
                    'resultOffset': offset,
                    'resultRecordCount': page_size,
                    'where': '1=1',
                    'geometryPrecision': self._precision,
                    'returnGeometry': self._request_geometry,
                    'outSR': self._outSR,
                    'outFields': ','.join(query_fields or ['*']),
                    'f': 'json',
                })
                page_args.append(query_args)
            self._logger.info("Built %s requests using resultOffset method", len(page_args))
        else:
            # If not, we can still use the `where` argument to paginate

            use_oids = True
            oid_field_name = self._find_oid_field_name(metadata)
        
            if not oid_field_name:
                raise EsriDownloadError("Could not find object ID field name for deduplication")

            if metadata.get('supportsStatistics'):
                # If the layer supports statistics, we can request maximum and minimum object ID
                # to help build the pages
                try:
                    (oid_min, oid_max) = self._get_layer_min_max(oid_field_name)

                    for page_min in range(oid_min - 1, oid_max, page_size):
                        page_max = min(page_min + page_size, oid_max)
                        query_args = self._build_query_args({
                            'where': '{} > {} AND {} <= {}'.format(
                                oid_field_name,
                                page_min,
                                oid_field_name,
                                page_max,
                            ),
                            'geometryPrecision': self._precision,
                            'returnGeometry': self._request_geometry,
                            'outSR': self._outSR,
                            'outFields': ','.join(query_fields or ['*']),
                            'f': 'json',
                        })
                        page_args.append(query_args)
                    self._logger.info("Built {} requests using OID where clause method".format(len(page_args)))

                    # If we reach this point we don't need to fall through to enumerating all object IDs
                    # because the statistics method worked
                    use_oids = False
                except EsriDownloadError:
                    self._logger.exception("Finding max/min from statistics failed. Trying OID enumeration.")

            if use_oids:
                # If the layer does not support statistics, we can request
                # all the individual IDs and page through them one chunk at
                # a time.

                try:
                    oids = sorted(map(int, self._get_layer_oids()))

                    for i in range(0, len(oids), page_size):
                        oid_chunk = oids[i:i+page_size]
                        page_min = oid_chunk[0]
                        page_max = oid_chunk[-1]
                        query_args = self._build_query_args({
                            'where': '{} >= {} AND {} <= {}'.format(
                                oid_field_name,
                                page_min,
                                oid_field_name,
                                page_max,
                            ),
                            'geometryPrecision': self._precision,
                            'returnGeometry': self._request_geometry,
                            'outSR': self._outSR,
                            'outFields': ','.join(query_fields or ['*']),
                            'f': 'json',
                        })
                        page_args.append(query_args)
                    self._logger.info("Built %s requests using OID enumeration method", len(page_args))
                except EsriDownloadError:
                    self._logger.info("Falling back to geo queries")
                    # Use geospatial queries when none of the ID-based methods will work
                    bounds = metadata['extent']
                    saved = set()

                    for feature in self._scrape_an_envelope(bounds, self._outSR, page_size):
                        attrs = feature['attributes']
                        oid = attrs.get(oid_field_name)
                        if oid in saved:
                            continue

                        yield esri2geojson(feature)

                        saved.add(oid)

                    return

        query_url = self._build_url('/query')
        headers = self._build_headers()
        for query_args in page_args:
            try:
                response = self._request('POST', query_url, headers=headers, data=query_args)
                data = self._handle_esri_errors(response, "Could not retrieve this chunk of objects")
            except socket.timeout as e:
                raise EsriDownloadError("Timeout when connecting to URL", e)
            except ValueError as e:
                raise EsriDownloadError("Could not parse JSON", e)
            except Exception as e:
                raise EsriDownloadError("Could not connect to URL", e)

            error = data.get('error')
            if error:
                raise EsriDownloadError("Problem querying ESRI dataset with args {}. Server said: {}".format(query_args, error['message']))

            features = data.get('features')

            for feature in features:
                yield esri2geojson(feature)