def main(): args = _parse_args(sys.argv[1:]) headers = _collect_headers(args.headers) params = _collect_params(args.params) logger = logging.getLogger('cli') logger.setLevel(args.loglevel) handler = logging.StreamHandler() formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) requested_fields = args.fields.split(',') if args.fields else None dumper = EsriDumper(args.url, extra_query_args=params, extra_headers=headers, fields=requested_fields, request_geometry=args.request_geometry, proxy=args.proxy, timeout=args.timeout, parent_logger=logger, max_retries=args.max_retries, offset=args.offset) if args.jsonlines: for feature in dumper: args.outfile.write(json.dumps(feature)) args.outfile.write('\n') else: args.outfile.write('{"type":"FeatureCollection","features":[\n') feature_iter = iter(dumper) try: feature = next(feature_iter) while True: args.outfile.write(json.dumps(feature)) feature = next(feature_iter) args.outfile.write(',\n') except StopIteration: args.outfile.write('\n') args.outfile.write(']}')
def download(self, source_urls, workdir, conform=None): output_files = [] download_path = os.path.join(workdir, 'esri') mkdirsp(download_path) query_fields = self.field_names_to_request(conform) for source_url in source_urls: size = 0 file_path = self.get_file_path(source_url, download_path) if os.path.exists(file_path): output_files.append(file_path) _L.debug("File exists %s", file_path) continue downloader = EsriDumper(source_url, parent_logger=_L) metadata = downloader.get_metadata() if query_fields is None: field_names = [f['name'] for f in metadata['fields']] else: field_names = query_fields[:] if X_FIELDNAME not in field_names: field_names.append(X_FIELDNAME) if Y_FIELDNAME not in field_names: field_names.append(Y_FIELDNAME) if GEOM_FIELDNAME not in field_names: field_names.append(GEOM_FIELDNAME) # Get the count of rows in the layer row_count = downloader.get_feature_count() _L.info("Source has {} rows".format(row_count)) with csvopen(file_path, 'w', encoding='utf-8') as f: writer = csvDictWriter(f, fieldnames=field_names, encoding='utf-8') writer.writeheader() for feature in downloader: try: geom = feature.get('geometry') or {} row = feature.get('properties') or {} if not geom: raise TypeError("No geometry parsed") if any((isinstance(g, float) and math.isnan(g)) for g in traverse(geom)): raise TypeError("Geometry has NaN coordinates") shp = shape(feature['geometry']) row[GEOM_FIELDNAME] = shp.wkt try: centroid = shp.centroid except RuntimeError as e: if 'Invalid number of points in LinearRing found' not in str( e): raise xmin, xmax, ymin, ymax = shp.bounds row[X_FIELDNAME] = round(xmin / 2 + xmax / 2, 7) row[Y_FIELDNAME] = round(ymin / 2 + ymax / 2, 7) else: row[X_FIELDNAME] = round(centroid.x, 7) row[Y_FIELDNAME] = round(centroid.y, 7) writer.writerow( {fn: row.get(fn) for fn in field_names}) size += 1 except TypeError: _L.debug("Skipping a geometry", exc_info=True) _L.info("Downloaded %s ESRI features for file %s", size, file_path) output_files.append(file_path) return output_files
def download(self, source_urls, workdir, source_config): output_files = [] download_path = os.path.join(workdir, 'esri') mkdirsp(download_path) query_fields = EsriRestDownloadTask.field_names_to_request( source_config) for source_url in source_urls: size = 0 file_path = self.get_file_path(source_url, download_path) if os.path.exists(file_path): output_files.append(file_path) _L.debug("File exists %s", file_path) continue downloader = EsriDumper(source_url, parent_logger=_L, timeout=300) metadata = downloader.get_metadata() if query_fields is None: field_names = [f['name'] for f in metadata['fields']] else: field_names = query_fields[:] if GEOM_FIELDNAME not in field_names: field_names.append(GEOM_FIELDNAME) field_names = list(map(lambda x: x.upper(), field_names)) # Get the count of rows in the layer try: row_count = downloader.get_feature_count() _L.info("Source has {} rows".format(row_count)) except EsriDownloadError: _L.info("Source doesn't support count") with open(file_path, 'w', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=field_names) writer.writeheader() for feature in downloader: try: geom = feature.get('geometry') or {} row = feature.get('properties') or {} if not geom: raise TypeError("No geometry parsed") if any((isinstance(g, float) and math.isnan(g)) for g in traverse(geom)): raise TypeError("Geometry has NaN coordinates") shp = shape(feature['geometry']) row[GEOM_FIELDNAME] = shp.wkt r = dict() for k, v in row.items(): r[k.upper()] = v row = r writer.writerow( {fn: row.get(fn) for fn in field_names}) size += 1 except TypeError: _L.debug("Skipping a geometry", exc_info=True) _L.info("Downloaded %s ESRI features for file %s", size, file_path) output_files.append(file_path) return output_files
def download(self, source_urls, workdir, conform=None): output_files = [] download_path = os.path.join(workdir, 'esri') mkdirsp(download_path) query_fields = EsriRestDownloadTask.field_names_to_request(conform) for source_url in source_urls: size = 0 file_path = self.get_file_path(source_url, download_path) if os.path.exists(file_path): output_files.append(file_path) _L.debug("File exists %s", file_path) continue downloader = EsriDumper(source_url, parent_logger=_L, timeout=300) metadata = downloader.get_metadata() if query_fields is None: field_names = [f['name'] for f in metadata['fields']] else: field_names = query_fields[:] if X_FIELDNAME not in field_names: field_names.append(X_FIELDNAME) if Y_FIELDNAME not in field_names: field_names.append(Y_FIELDNAME) if GEOM_FIELDNAME not in field_names: field_names.append(GEOM_FIELDNAME) # Get the count of rows in the layer try: row_count = downloader.get_feature_count() _L.info("Source has {} rows".format(row_count)) except EsriDownloadError: _L.info("Source doesn't support count") with open(file_path, 'w', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=field_names) writer.writeheader() for feature in downloader: try: geom = feature.get('geometry') or {} row = feature.get('properties') or {} if not geom: raise TypeError("No geometry parsed") if any((isinstance(g, float) and math.isnan(g)) for g in traverse(geom)): raise TypeError("Geometry has NaN coordinates") shp = shape(feature['geometry']) row[GEOM_FIELDNAME] = shp.wkt try: centroid = shp.centroid except RuntimeError as e: if 'Invalid number of points in LinearRing found' not in str(e): raise xmin, xmax, ymin, ymax = shp.bounds row[X_FIELDNAME] = round(xmin/2 + xmax/2, 7) row[Y_FIELDNAME] = round(ymin/2 + ymax/2, 7) else: if centroid.is_empty: raise TypeError(json.dumps(feature['geometry'])) row[X_FIELDNAME] = round(centroid.x, 7) row[Y_FIELDNAME] = round(centroid.y, 7) writer.writerow({fn: row.get(fn) for fn in field_names}) size += 1 except TypeError: _L.debug("Skipping a geometry", exc_info=True) _L.info("Downloaded %s ESRI features for file %s", size, file_path) output_files.append(file_path) return output_files