def __init__(self, code="", area_type=1, epsilon=5, media_path="", with_log=True, catalog="", coord_out="EPSG:3857", center_only=False, with_proxy=False): self.with_log = with_log self.area_type = area_type self.media_path = media_path self.image_url = "" self.xy = [] # [[[area1], [hole1], [holeN]], [[area2]]] self.image_xy_corner = [] # cartesian coord from image, for draw plot self.width = 0 self.height = 0 self.image_path = "" self.extent = {} self.image_extent = {} self.center = {'x': None, 'y': None} self.center_only = center_only self.attrs = {} self.epsilon = epsilon self.code = code self.code_id = "" self.file_name = self.code[:].replace(":", "_") self.with_proxy = with_proxy self.coord_out = coord_out t = string.Template(SEARCH_URL) self.search_url = t.substitute({"area_type": area_type}) t = string.Template(FEATURE_INFO_URL) self.feature_info_url = t.substitute({"area_type": area_type}) if not self.media_path: # self.media_path = os.path.dirname(os.path.realpath(__file__)) self.media_path = os.getcwd() if not os.path.isdir(self.media_path): os.makedirs(self.media_path) if catalog: self.catalog = Catalog(catalog) restore = self.catalog.find(self.code) if restore: self.restore(restore) self.log("%s - restored from %s" % (self.code, catalog)) return if not code: return feature_info = self.download_feature_info() if feature_info: geometry = self.get_geometry() if catalog and geometry: self.catalog.update(self) self.catalog.close() else: self.log("Nothing found")
def batch_parser(codes, area_type=1, media_path="", with_log=False, catalog_path="", coord_out="EPSG:3857", file_name="example", output=os.path.join("output"), repeat=0, areas=None, with_attrs=False, delay=1, center_only=False, with_proxy=False): if areas is None: areas = [] try: catalog = Catalog(catalog_path) except: print("Catalog is required for batch mode!") return restores = [] with_error = [] with_no_coord = [] success = 0 from_catalog = 0 print("================================") print("Launched parsing of %i areas:" % len(codes)) print("================================") need_sleep = 0 features = [] for c in codes: area = None code = c.strip() print("%s" % code, end="") restore = catalog.find(code) if not restore: try: sleep(need_sleep) area = Area(code, media_path=media_path, area_type=area_type, with_log=with_log, coord_out=coord_out, center_only=center_only, with_proxy=with_proxy) need_sleep = delay restore = catalog.refresh(area) if not (len(area.get_coord()) > 0): print(" - no coord", end="") with_no_coord.append(area) else: print(" - ok", end="") success += 1 except TimeoutException: print(" - error") print("Your IP is probably blocked. Try later or use proxy") break except Exception as er: print(" - error", end="") with_error.append(code) else: from_catalog += 1 area = restore_area(restore, media_path=media_path, area_type=area_type, with_log=with_log, coord_out=coord_out, center_only=center_only, with_proxy=with_proxy) if restore["image_path"]: print(" - ok, from catalog", end="") success += 1 else: print(" - no_coord, from catalog", end="") with_no_coord.append(area) percent = ((success + len(with_error) + len(with_no_coord)) / len(codes)) * 100 print(", %i%%" % percent) restores.append(restore) if area: areas.append(area) feature = area_json_output(output, area, with_attrs) if feature: features.append(feature) area_csv_output(output, area) catalog.close() print("=================") print("Parsing complate:") print(" success : %i" % success) print(" error : %i" % len(with_error)) print(" no_coord : %i" % len(with_no_coord)) print(" from catalog: %i" % from_catalog) print("-----------------") if len(with_error) and repeat: print("Retries parse areas with error") batch_parser(with_error, area_type=area_type, media_path=media_path, with_log=with_log, file_name=file_name, catalog_path=catalog_path, coord_out=coord_out, repeat=repeat - 1, areas=areas, output=output, delay=delay, with_proxy=with_proxy) else: path = batch_csv_output(output, areas, file_name) print("Create output complete: %s" % path) if len(with_no_coord): path = batch_csv_output(output, with_no_coord, "%s_no_coord" % file_name) print("Create output for no_coord complete: %s" % path) if len(features): batch_json_output(output, areas, file_name, with_attrs, coord_out) if len(with_error): print("-----------------") print("Error list:") for e in with_error: print(e)
class Area: image_url = IMAGE_URL buffer = 10 save_attrs = [ "code", "area_type", "attrs", "image_path", "center", "extent", "image_extent", "width", "height" ] def __init__(self, code="", area_type=1, epsilon=5, media_path="", with_log=True, catalog="", coord_out="EPSG:3857", center_only=False, with_proxy=False): self.with_log = with_log self.area_type = area_type self.media_path = media_path self.image_url = "" self.xy = [] # [[[area1], [hole1], [holeN]], [[area2]]] self.image_xy_corner = [] # cartesian coord from image, for draw plot self.width = 0 self.height = 0 self.image_path = "" self.extent = {} self.image_extent = {} self.center = {'x': None, 'y': None} self.center_only = center_only self.attrs = {} self.epsilon = epsilon self.code = code self.code_id = "" self.file_name = self.code[:].replace(":", "_") self.with_proxy = with_proxy self.coord_out = coord_out t = string.Template(SEARCH_URL) self.search_url = t.substitute({"area_type": area_type}) t = string.Template(FEATURE_INFO_URL) self.feature_info_url = t.substitute({"area_type": area_type}) if not self.media_path: # self.media_path = os.path.dirname(os.path.realpath(__file__)) self.media_path = os.getcwd() if not os.path.isdir(self.media_path): os.makedirs(self.media_path) if catalog: self.catalog = Catalog(catalog) restore = self.catalog.find(self.code) if restore: self.restore(restore) self.log("%s - restored from %s" % (self.code, catalog)) return if not code: return feature_info = self.download_feature_info() if feature_info: geometry = self.get_geometry() if catalog and geometry: self.catalog.update(self) self.catalog.close() else: self.log("Nothing found") def restore(self, restore): for a in self.save_attrs: setattr(self, a, restore[a]) if self.coord_out: setattr(self, "coord_out", self.coord_out) setattr(self, "code_id", self.code) self.get_geometry() self.file_name = self.code.replace(":", "_") def get_coord(self): if self.xy: return self.xy center = self.get_center_xy() if center: return center return [] def get_attrs(self): return self.attrs def _get_attrs_to_geojson(self): if self.attrs: for a in self.attrs: attr = self.attrs[a] if isinstance(attr, basestring): try: attr = attr.encode('utf-8').strip() self.attrs[a] = attr except: pass return self.attrs def to_geojson_poly(self, with_attrs=False, dumps=True): return self.to_geojson("polygon", with_attrs, dumps) def to_geojson_center(self, with_attrs=False, dumps=True): current_center_status = self.center_only self.center_only = True to_return = self.to_geojson("point", with_attrs, dumps) self.center_only = current_center_status return to_return def to_geojson(self, geom_type="point", with_attrs=False, dumps=True): attrs = False if with_attrs: attrs = self._get_attrs_to_geojson() xy = [] if self.center_only: xy = self.get_center_xy() geom_type = "point" else: xy = self.xy if xy and len(xy): feature_collection = coords2geojson(xy, geom_type, self.coord_out, attrs=attrs) if feature_collection: if dumps: return json.dumps(feature_collection) return feature_collection return False def get_center_xy(self): center = self.attrs.get("center") if center: xy = [[[[center["x"], center["y"]]]]] return xy return False def make_request(self, url): response = make_request(url, self.with_proxy) return response def download_feature_info(self): try: search_url = self.feature_info_url + self.clear_code(self.code) self.log("Start downloading area info: %s" % search_url) response = self.make_request(search_url) resp = response data = json.loads(resp) if data: feature = data.get("feature") if feature: attrs = feature.get("attrs") if attrs: self.attrs = attrs self.code_id = attrs["id"] if feature.get("extent"): self.extent = feature["extent"] if feature.get("center"): x = feature["center"]["x"] y = feature["center"]["y"] if self.coord_out == "EPSG:4326": (x, y) = xy2lonlat(x, y) self.center = {"x": x, "y": y} self.attrs["center"] = self.center self.log("Area info downloaded.") return feature except TimeoutException: raise TimeoutException() except Exception as error: self.error(error) return False @staticmethod def clear_code(code): """remove first nulls from code xxxx:00xx >> xxxx:xx""" return ":".join(map(lambda x: str(int(x)), code.split(":"))) @staticmethod def get_extent_list(extent): """convert extent dick to ordered array""" return [extent["xmin"], extent["ymin"], extent["xmax"], extent["ymax"]] def get_buffer_extent_list(self): """add some buffer to ordered extent array""" ex = self.extent buf = self.buffer if ex and ex["xmin"]: ex = [ ex["xmin"] - buf, ex["ymin"] - buf, ex["xmax"] + buf, ex["ymax"] + buf ] else: self.log("Area has no coordinates") # raise NoCoordinatesException() return ex def get_geometry(self): if self.center_only: return self.get_center_xy() else: return self.parse_geometry_from_image() def parse_geometry_from_image(self): formats = ["png"] tmp_dir = os.path.join(self.media_path, "tmp") if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) for f in formats: bbox = self.get_buffer_extent_list() if bbox: image = PkkAreaMerger(bbox=self.get_buffer_extent_list(), output_format=f, with_log=self.with_log, clear_code=self.clear_code(self.code_id), output_dir=tmp_dir, make_request=self.make_request) image.download() self.image_path = image.merge_tiles() self.width = image.real_width self.height = image.real_height self.image_extent = image.image_extent if image: return self.get_image_geometry() def get_image_geometry(self): """ get corner geometry array from downloaded image [area1],[area2] - may be multipolygon geometry | [self],[hole_1],[hole_N] - holes is optional | [coord1],[coord2],[coord3] - min 3 coord for polygon | [x,y] - coordinate pair Example: [[ [ [x,y],[x,y],[x,y] ], [ [x,y],[x,y],[x,y] ], ], [ [x,y],[x,y],[x,y] ], [ [x,y],[x,y],[x,y] ] ] -----------------first polygon----------------- ----------------second polygon-------------- ----outer contour--- --first hole contour- """ image_xy_corner = self.image_xy_corner = self.get_image_xy_corner() if image_xy_corner: self.xy = copy.deepcopy(image_xy_corner) for geom in self.xy: for p in range(len(geom)): geom[p] = self.image_corners_to_coord(geom[p]) return self.xy return [] def get_image_xy_corner(self): """get сartesian coordinates from raster""" import cv2 if not self.image_path: return False image_xy_corners = [] img = cv2.imread(self.image_path, cv2.IMREAD_GRAYSCALE) imagem = (255 - img) try: ret, thresh = cv2.threshold(imagem, 10, 128, cv2.THRESH_BINARY) try: contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) except Exception: im2, contours, hierarchy = cv2.findContours( thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) hierarchy = hierarchy[0] hierarhy_contours = [[] for _ in range(len(hierarchy))] for fry in range(len(contours)): currentContour = contours[fry] currentHierarchy = hierarchy[fry] cc = [] # epsilon = 0.0005 * cv2.arcLength(contours[len(contours) - 1], True) approx = cv2.approxPolyDP(currentContour, self.epsilon, True) if len(approx) > 2: for c in approx: cc.append([c[0][0], c[0][1]]) parent_index = currentHierarchy[3] index = fry if parent_index < 0 else parent_index hierarhy_contours[index].append(cc) image_xy_corners = [c for c in hierarhy_contours if len(c) > 0] return image_xy_corners except Exception as ex: self.error(ex) return image_xy_corners def image_corners_to_coord(self, image_xy_corners): """calculate spatial coordinates from cartesian""" ex = self.get_extent_list(self.image_extent) dx = ((ex[2] - ex[0]) / self.width) dy = ((ex[3] - ex[1]) / self.height) xy_corners = [] for im_x, im_y in image_xy_corners: x = ex[0] + (im_x * dx) y = ex[3] - (im_y * dy) if self.coord_out == "EPSG:4326": (x, y) = xy2lonlat(x, y) xy_corners.append([x, y]) return xy_corners def show_plot(self): """Development tool""" import cv2 try: from matplotlib import pyplot as plt except ImportError: self.error('Matplotlib is not installed.') raise ImportError('Matplotlib is not installed.') img = cv2.imread(self.image_path) for corners in self.image_xy_corner: for x, y in corners: cv2.circle(img, (x, y), 3, 255, -1) plt.imshow(img), plt.show() def log(self, msg): if self.with_log: print(msg) def error(self, msg): print('err')
def batch_parser(codes, area_type=1, media_path="", with_log=False, catalog_path="", coord_out="EPSG:3857", file_name="example", output=os.path.join("output"), repeat=5, areas=None, with_attrs=False): if areas is None: areas = [] catalog = Catalog(catalog_path) restores = [] with_error = [] success = 0 from_catalog = 0 print("================================") print("Launched parsing of %i areas:" % len(codes)) print("================================") for c in codes: code = c.strip() print("%s" % code, end="") restore = catalog.find(code) if not restore: try: area = Area(code, media_path=media_path, area_type=area_type, with_log=with_log, coord_out=coord_out) assert (len(area.get_coord()) > 0) restore = catalog.update(area) print(" - ok", end="") success += 1 except Exception: area = None print(" - error", end="") with_error.append(code) else: print(" - ok, from catalog", end="") success += 1 from_catalog += 1 area = restore_area(restore, coord_out) percent = ((success + len(with_error)) / len(codes)) * 100 print(", %i%%" % percent) restores.append(restore) if area: areas.append(area) area_json_output(output, area, with_attrs) area_csv_output(output, area) catalog.close() print("=================") print("Parsing complate:") print(" success : %i" % success) print(" error : %i" % len(with_error)) print(" from catalog: %i" % from_catalog) print("-----------------") if len(with_error) and repeat: print("Retries parse areas with error") batch_parser(with_error, area_type=area_type, media_path=media_path, with_log=with_log, file_name=file_name, catalog_path=catalog_path, coord_out=coord_out, repeat=repeat - 1, areas=areas, output=output) else: path = batch_csv_output(output, areas, file_name) print("Create output complete: %s" % path) if len(with_error): print("-----------------") print("Error list:") for e in with_error: print(e)