def assertEsriJsonBecomesGeoJson(self, esrijson, geojson): out_json = esri2geojson(esrijson) self.assertDictEqual(out_json, geojson)
def __iter__(self): query_fields = self._fields metadata = self.get_metadata() page_size = min(1000, metadata.get('maxRecordCount', 500)) geometry_type = metadata.get('geometryType') row_count = None try: row_count = self.get_feature_count() except EsriDownloadError: self._logger.info("Source does not support feature count") page_args = [] if row_count is not None and (metadata.get('supportsPagination') or \ (metadata.get('advancedQueryCapabilities') and metadata['advancedQueryCapabilities']['supportsPagination'])): # If the layer supports pagination, we can use resultOffset/resultRecordCount to paginate # There's a bug where some servers won't handle these queries in combination with a list of # fields specified. We'll make a single, 1 row query here to check if the server supports this # and switch to querying for all fields if specifying the fields fails. if query_fields and not self.can_handle_pagination(query_fields): self._logger.info( "Source does not support pagination with fields specified, so querying for all fields." ) query_fields = None for offset in range(self._startWith, row_count, page_size): query_args = self._build_query_args({ 'resultOffset': offset, 'resultRecordCount': page_size, 'where': '1=1', 'geometryPrecision': self._precision, 'returnGeometry': self._request_geometry, 'outSR': self._outSR, 'outFields': ','.join(query_fields or ['*']), 'f': 'json', }) page_args.append(query_args) self._logger.info("Built %s requests using resultOffset method", len(page_args)) else: # If not, we can still use the `where` argument to paginate use_oids = True oid_field_name = self._find_oid_field_name(metadata) if not oid_field_name: raise EsriDownloadError( "Could not find object ID field name for deduplication") if metadata.get('supportsStatistics'): # If the layer supports statistics, we can request maximum and minimum object ID # to help build the pages try: (oid_min, oid_max) = self._get_layer_min_max(oid_field_name) for page_min in range(oid_min - 1, oid_max, page_size): page_max = min(page_min + page_size, oid_max) query_args = self._build_query_args({ 'where': '{} > {} AND {} <= {}'.format( oid_field_name, page_min, oid_field_name, page_max, ), 'geometryPrecision': self._precision, 'returnGeometry': self._request_geometry, 'outSR': self._outSR, 'outFields': ','.join(query_fields or ['*']), 'f': 'json', }) page_args.append(query_args) self._logger.info( "Built {} requests using OID where clause method". format(len(page_args))) # If we reach this point we don't need to fall through to enumerating all object IDs # because the statistics method worked use_oids = False except EsriDownloadError: self._logger.exception( "Finding max/min from statistics failed. Trying OID enumeration." ) if use_oids: # If the layer does not support statistics, we can request # all the individual IDs and page through them one chunk at # a time. try: oids = sorted(map(int, self._get_layer_oids())) for i in range(0, len(oids), page_size): oid_chunk = oids[i:i + page_size] page_min = oid_chunk[0] page_max = oid_chunk[-1] query_args = self._build_query_args({ 'where': '{} >= {} AND {} <= {}'.format( oid_field_name, page_min, oid_field_name, page_max, ), 'geometryPrecision': self._precision, 'returnGeometry': self._request_geometry, 'outSR': self._outSR, 'outFields': ','.join(query_fields or ['*']), 'f': 'json', }) page_args.append(query_args) self._logger.info( "Built %s requests using OID enumeration method", len(page_args)) except EsriDownloadError: self._logger.info("Falling back to geo queries") # Use geospatial queries when none of the ID-based methods will work bounds = metadata['extent'] saved = set() for feature in self._scrape_an_envelope( bounds, self._outSR, page_size): attrs = feature['attributes'] oid = attrs.get(oid_field_name) if oid in saved: continue yield esri2geojson(feature) saved.add(oid) return query_url = self._build_url('/query') headers = self._build_headers() for query_args in page_args: try: response = self._request('POST', query_url, headers=headers, data=query_args) data = self._handle_esri_errors( response, "Could not retrieve this chunk of objects") except socket.timeout as e: raise EsriDownloadError("Timeout when connecting to URL", e) except ValueError as e: raise EsriDownloadError("Could not parse JSON", e) except Exception as e: raise EsriDownloadError("Could not connect to URL", e) error = data.get('error') if error: raise EsriDownloadError( "Problem querying ESRI dataset with args {}. Server said: {}" .format(query_args, error['message'])) features = data.get('features') for feature in features: yield esri2geojson(feature)
def __iter__(self): query_fields = self._fields metadata = self.get_metadata() page_size = min(1000, metadata.get('maxRecordCount', 500)) geometry_type = metadata.get('geometryType') row_count = None try: row_count = self.get_feature_count() except EsriDownloadError: self._logger.info("Source does not support feature count") page_args = [] if row_count is not None and (metadata.get('supportsPagination') or \ (metadata.get('advancedQueryCapabilities') and metadata['advancedQueryCapabilities']['supportsPagination'])): # If the layer supports pagination, we can use resultOffset/resultRecordCount to paginate # There's a bug where some servers won't handle these queries in combination with a list of # fields specified. We'll make a single, 1 row query here to check if the server supports this # and switch to querying for all fields if specifying the fields fails. if query_fields and not self.can_handle_pagination(query_fields): self._logger.info("Source does not support pagination with fields specified, so querying for all fields.") query_fields = None for offset in range(self._startWith, row_count, page_size): query_args = self._build_query_args({ 'resultOffset': offset, 'resultRecordCount': page_size, 'where': '1=1', 'geometryPrecision': self._precision, 'returnGeometry': self._request_geometry, 'outSR': self._outSR, 'outFields': ','.join(query_fields or ['*']), 'f': 'json', }) page_args.append(query_args) self._logger.info("Built %s requests using resultOffset method", len(page_args)) else: # If not, we can still use the `where` argument to paginate use_oids = True oid_field_name = self._find_oid_field_name(metadata) if not oid_field_name: raise EsriDownloadError("Could not find object ID field name for deduplication") if metadata.get('supportsStatistics'): # If the layer supports statistics, we can request maximum and minimum object ID # to help build the pages try: (oid_min, oid_max) = self._get_layer_min_max(oid_field_name) for page_min in range(oid_min - 1, oid_max, page_size): page_max = min(page_min + page_size, oid_max) query_args = self._build_query_args({ 'where': '{} > {} AND {} <= {}'.format( oid_field_name, page_min, oid_field_name, page_max, ), 'geometryPrecision': self._precision, 'returnGeometry': self._request_geometry, 'outSR': self._outSR, 'outFields': ','.join(query_fields or ['*']), 'f': 'json', }) page_args.append(query_args) self._logger.info("Built {} requests using OID where clause method".format(len(page_args))) # If we reach this point we don't need to fall through to enumerating all object IDs # because the statistics method worked use_oids = False except EsriDownloadError: self._logger.exception("Finding max/min from statistics failed. Trying OID enumeration.") if use_oids: # If the layer does not support statistics, we can request # all the individual IDs and page through them one chunk at # a time. try: oids = sorted(map(int, self._get_layer_oids())) for i in range(0, len(oids), page_size): oid_chunk = oids[i:i+page_size] page_min = oid_chunk[0] page_max = oid_chunk[-1] query_args = self._build_query_args({ 'where': '{} >= {} AND {} <= {}'.format( oid_field_name, page_min, oid_field_name, page_max, ), 'geometryPrecision': self._precision, 'returnGeometry': self._request_geometry, 'outSR': self._outSR, 'outFields': ','.join(query_fields or ['*']), 'f': 'json', }) page_args.append(query_args) self._logger.info("Built %s requests using OID enumeration method", len(page_args)) except EsriDownloadError: self._logger.info("Falling back to geo queries") # Use geospatial queries when none of the ID-based methods will work bounds = metadata['extent'] saved = set() for feature in self._scrape_an_envelope(bounds, self._outSR, page_size): attrs = feature['attributes'] oid = attrs.get(oid_field_name) if oid in saved: continue yield esri2geojson(feature) saved.add(oid) return query_url = self._build_url('/query') headers = self._build_headers() for query_args in page_args: try: response = self._request('POST', query_url, headers=headers, data=query_args) data = self._handle_esri_errors(response, "Could not retrieve this chunk of objects") except socket.timeout as e: raise EsriDownloadError("Timeout when connecting to URL", e) except ValueError as e: raise EsriDownloadError("Could not parse JSON", e) except Exception as e: raise EsriDownloadError("Could not connect to URL", e) error = data.get('error') if error: raise EsriDownloadError("Problem querying ESRI dataset with args {}. Server said: {}".format(query_args, error['message'])) features = data.get('features') for feature in features: yield esri2geojson(feature)