def download(self, source_urls, workdir, source_config): output_files = [] download_path = os.path.join(workdir, 'esri') mkdirsp(download_path) query_fields = EsriRestDownloadTask.field_names_to_request( source_config) for source_url in source_urls: size = 0 file_path = self.get_file_path(source_url, download_path) if os.path.exists(file_path): output_files.append(file_path) _L.debug("File exists %s", file_path) continue downloader = EsriDumper(source_url, parent_logger=_L, timeout=300) metadata = downloader.get_metadata() if query_fields is None: field_names = [f['name'] for f in metadata['fields']] else: field_names = query_fields[:] if GEOM_FIELDNAME not in field_names: field_names.append(GEOM_FIELDNAME) field_names = list(map(lambda x: x.upper(), field_names)) # Get the count of rows in the layer try: row_count = downloader.get_feature_count() _L.info("Source has {} rows".format(row_count)) except EsriDownloadError: _L.info("Source doesn't support count") with open(file_path, 'w', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=field_names) writer.writeheader() for feature in downloader: try: geom = feature.get('geometry') or {} row = feature.get('properties') or {} if not geom: raise TypeError("No geometry parsed") if any((isinstance(g, float) and math.isnan(g)) for g in traverse(geom)): raise TypeError("Geometry has NaN coordinates") shp = shape(feature['geometry']) row[GEOM_FIELDNAME] = shp.wkt r = dict() for k, v in row.items(): r[k.upper()] = v row = r writer.writerow( {fn: row.get(fn) for fn in field_names}) size += 1 except TypeError: _L.debug("Skipping a geometry", exc_info=True) _L.info("Downloaded %s ESRI features for file %s", size, file_path) output_files.append(file_path) return output_files
def download(self, source_urls, workdir, conform=None): output_files = [] download_path = os.path.join(workdir, 'esri') mkdirsp(download_path) query_fields = self.field_names_to_request(conform) for source_url in source_urls: size = 0 file_path = self.get_file_path(source_url, download_path) if os.path.exists(file_path): output_files.append(file_path) _L.debug("File exists %s", file_path) continue downloader = EsriDumper(source_url, parent_logger=_L) metadata = downloader.get_metadata() if query_fields is None: field_names = [f['name'] for f in metadata['fields']] else: field_names = query_fields[:] if X_FIELDNAME not in field_names: field_names.append(X_FIELDNAME) if Y_FIELDNAME not in field_names: field_names.append(Y_FIELDNAME) if GEOM_FIELDNAME not in field_names: field_names.append(GEOM_FIELDNAME) # Get the count of rows in the layer row_count = downloader.get_feature_count() _L.info("Source has {} rows".format(row_count)) with csvopen(file_path, 'w', encoding='utf-8') as f: writer = csvDictWriter(f, fieldnames=field_names, encoding='utf-8') writer.writeheader() for feature in downloader: try: geom = feature.get('geometry') or {} row = feature.get('properties') or {} if not geom: raise TypeError("No geometry parsed") if any((isinstance(g, float) and math.isnan(g)) for g in traverse(geom)): raise TypeError("Geometry has NaN coordinates") shp = shape(feature['geometry']) row[GEOM_FIELDNAME] = shp.wkt try: centroid = shp.centroid except RuntimeError as e: if 'Invalid number of points in LinearRing found' not in str( e): raise xmin, xmax, ymin, ymax = shp.bounds row[X_FIELDNAME] = round(xmin / 2 + xmax / 2, 7) row[Y_FIELDNAME] = round(ymin / 2 + ymax / 2, 7) else: row[X_FIELDNAME] = round(centroid.x, 7) row[Y_FIELDNAME] = round(centroid.y, 7) writer.writerow( {fn: row.get(fn) for fn in field_names}) size += 1 except TypeError: _L.debug("Skipping a geometry", exc_info=True) _L.info("Downloaded %s ESRI features for file %s", size, file_path) output_files.append(file_path) return output_files
def download(self, source_urls, workdir, conform=None): output_files = [] download_path = os.path.join(workdir, 'esri') mkdirsp(download_path) query_fields = EsriRestDownloadTask.field_names_to_request(conform) for source_url in source_urls: size = 0 file_path = self.get_file_path(source_url, download_path) if os.path.exists(file_path): output_files.append(file_path) _L.debug("File exists %s", file_path) continue downloader = EsriDumper(source_url, parent_logger=_L, timeout=300) metadata = downloader.get_metadata() if query_fields is None: field_names = [f['name'] for f in metadata['fields']] else: field_names = query_fields[:] if X_FIELDNAME not in field_names: field_names.append(X_FIELDNAME) if Y_FIELDNAME not in field_names: field_names.append(Y_FIELDNAME) if GEOM_FIELDNAME not in field_names: field_names.append(GEOM_FIELDNAME) # Get the count of rows in the layer try: row_count = downloader.get_feature_count() _L.info("Source has {} rows".format(row_count)) except EsriDownloadError: _L.info("Source doesn't support count") with open(file_path, 'w', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=field_names) writer.writeheader() for feature in downloader: try: geom = feature.get('geometry') or {} row = feature.get('properties') or {} if not geom: raise TypeError("No geometry parsed") if any((isinstance(g, float) and math.isnan(g)) for g in traverse(geom)): raise TypeError("Geometry has NaN coordinates") shp = shape(feature['geometry']) row[GEOM_FIELDNAME] = shp.wkt try: centroid = shp.centroid except RuntimeError as e: if 'Invalid number of points in LinearRing found' not in str(e): raise xmin, xmax, ymin, ymax = shp.bounds row[X_FIELDNAME] = round(xmin/2 + xmax/2, 7) row[Y_FIELDNAME] = round(ymin/2 + ymax/2, 7) else: if centroid.is_empty: raise TypeError(json.dumps(feature['geometry'])) row[X_FIELDNAME] = round(centroid.x, 7) row[Y_FIELDNAME] = round(centroid.y, 7) writer.writerow({fn: row.get(fn) for fn in field_names}) size += 1 except TypeError: _L.debug("Skipping a geometry", exc_info=True) _L.info("Downloaded %s ESRI features for file %s", size, file_path) output_files.append(file_path) return output_files