Esempio n. 1
0
    def __init__(self,
                 code="",
                 area_type=1,
                 epsilon=5,
                 media_path="",
                 with_log=True,
                 catalog="",
                 coord_out="EPSG:3857",
                 center_only=False,
                 with_proxy=False):
        self.with_log = with_log
        self.area_type = area_type
        self.media_path = media_path
        self.image_url = ""
        self.xy = []  # [[[area1], [hole1], [holeN]], [[area2]]]
        self.image_xy_corner = []  # cartesian coord from image, for draw plot
        self.width = 0
        self.height = 0
        self.image_path = ""
        self.extent = {}
        self.image_extent = {}
        self.center = {'x': None, 'y': None}
        self.center_only = center_only
        self.attrs = {}
        self.epsilon = epsilon
        self.code = code
        self.code_id = ""
        self.file_name = self.code[:].replace(":", "_")
        self.with_proxy = with_proxy

        self.coord_out = coord_out

        t = string.Template(SEARCH_URL)
        self.search_url = t.substitute({"area_type": area_type})
        t = string.Template(FEATURE_INFO_URL)
        self.feature_info_url = t.substitute({"area_type": area_type})

        if not self.media_path:
            # self.media_path = os.path.dirname(os.path.realpath(__file__))
            self.media_path = os.getcwd()
        if not os.path.isdir(self.media_path):
            os.makedirs(self.media_path)
        if catalog:
            self.catalog = Catalog(catalog)
            restore = self.catalog.find(self.code)
            if restore:
                self.restore(restore)
                self.log("%s - restored from %s" % (self.code, catalog))
                return
        if not code:
            return

        feature_info = self.download_feature_info()
        if feature_info:
            geometry = self.get_geometry()
            if catalog and geometry:
                self.catalog.update(self)
                self.catalog.close()
        else:
            self.log("Nothing found")
Esempio n. 2
0
def batch_parser(codes,
                 area_type=1,
                 media_path="",
                 with_log=False,
                 catalog_path="",
                 coord_out="EPSG:3857",
                 file_name="example",
                 output=os.path.join("output"),
                 repeat=0,
                 areas=None,
                 with_attrs=False,
                 delay=1,
                 center_only=False,
                 with_proxy=False):
    if areas is None:
        areas = []
    try:
        catalog = Catalog(catalog_path)
    except:
        print("Catalog is required for batch mode!")
        return
    restores = []
    with_error = []
    with_no_coord = []
    success = 0
    from_catalog = 0
    print("================================")
    print("Launched parsing of %i areas:" % len(codes))
    print("================================")
    need_sleep = 0
    features = []
    for c in codes:
        area = None
        code = c.strip()
        print("%s" % code, end="")
        restore = catalog.find(code)
        if not restore:
            try:
                sleep(need_sleep)
                area = Area(code,
                            media_path=media_path,
                            area_type=area_type,
                            with_log=with_log,
                            coord_out=coord_out,
                            center_only=center_only,
                            with_proxy=with_proxy)
                need_sleep = delay
                restore = catalog.refresh(area)
                if not (len(area.get_coord()) > 0):
                    print(" - no coord", end="")
                    with_no_coord.append(area)
                else:
                    print(" - ok", end="")
                    success += 1
            except TimeoutException:
                print(" - error")
                print("Your IP is probably blocked. Try later or use proxy")
                break

            except Exception as er:
                print(" - error", end="")
                with_error.append(code)
        else:
            from_catalog += 1
            area = restore_area(restore,
                                media_path=media_path,
                                area_type=area_type,
                                with_log=with_log,
                                coord_out=coord_out,
                                center_only=center_only,
                                with_proxy=with_proxy)
            if restore["image_path"]:
                print(" - ok, from catalog", end="")
                success += 1
            else:
                print(" - no_coord, from catalog", end="")
                with_no_coord.append(area)
        percent = ((success + len(with_error) + len(with_no_coord)) /
                   len(codes)) * 100
        print(", %i%%" % percent)
        restores.append(restore)

        if area:
            areas.append(area)
            feature = area_json_output(output, area, with_attrs)
            if feature:
                features.append(feature)
            area_csv_output(output, area)

    catalog.close()

    print("=================")
    print("Parsing complate:")
    print("  success     : %i" % success)
    print("  error       : %i" % len(with_error))
    print("  no_coord    : %i" % len(with_no_coord))
    print("  from catalog: %i" % from_catalog)
    print("-----------------")

    if len(with_error) and repeat:
        print("Retries parse areas with error")
        batch_parser(with_error,
                     area_type=area_type,
                     media_path=media_path,
                     with_log=with_log,
                     file_name=file_name,
                     catalog_path=catalog_path,
                     coord_out=coord_out,
                     repeat=repeat - 1,
                     areas=areas,
                     output=output,
                     delay=delay,
                     with_proxy=with_proxy)
    else:
        path = batch_csv_output(output, areas, file_name)
        print("Create output complete: %s" % path)
        if len(with_no_coord):
            path = batch_csv_output(output, with_no_coord,
                                    "%s_no_coord" % file_name)
            print("Create output for no_coord complete: %s" % path)
        if len(features):
            batch_json_output(output, areas, file_name, with_attrs, coord_out)
        if len(with_error):
            print("-----------------")
            print("Error list:")
            for e in with_error:
                print(e)
Esempio n. 3
0
def batch_parser(codes, area_type=1, media_path="", with_log=False, catalog_path="", coord_out="EPSG:3857",
                 file_name="example", output=os.path.join("output"), repeat=0, areas=None, with_attrs=False, delay=1,
                 center_only=False, with_proxy=False):
    if areas is None:
        areas = []
    try:
        catalog = Catalog(catalog_path)
    except:
        print("Catalog is required for batch mode!")
        return
    restores = []
    with_error = []
    with_no_coord = []
    success = 0
    from_catalog = 0
    print("================================")
    print("Launched parsing of %i areas:" % len(codes))
    print("================================")
    need_sleep = 0
    features = []
    for c in codes:
        area = None
        code = c.strip()
        print("%s" % code, end="")
        restore = catalog.find(code)
        if not restore:
            try:
                sleep(need_sleep)
                area = Area(code, media_path=media_path, area_type=area_type, with_log=with_log, coord_out=coord_out,
                            center_only=center_only, with_proxy=with_proxy)
                need_sleep = delay
                restore = catalog.refresh(area)
                if not (len(area.get_coord()) > 0):
                    print(" - no coord", end="")
                    with_no_coord.append(area)
                else:
                    print(" - ok", end="")
                    success += 1
            except TimeoutException:
                print(" - error")
                print("Your IP is probably blocked. Try later or use proxy")
                break

            except Exception as er:
                print(" - error", end="")
                with_error.append(code)
        else:
            from_catalog += 1
            area = restore_area(restore, media_path=media_path, area_type=area_type, with_log=with_log, coord_out=coord_out,
                            center_only=center_only, with_proxy=with_proxy)
            if restore["image_path"]:
                print(" - ok, from catalog", end="")
                success += 1
            else:
                print(" - no_coord, from catalog", end="")
                with_no_coord.append(area)
        percent = ((success + len(with_error) + len(with_no_coord)) / len(codes)) * 100
        print(", %i%%" % percent)
        restores.append(restore)

        if area:
            areas.append(area)
            feature = area_json_output(output, area, with_attrs)
            if feature:
                features.append(feature)
            area_csv_output(output, area)

    catalog.close()

    print("=================")
    print("Parsing complate:")
    print("  success     : %i" % success)
    print("  error       : %i" % len(with_error))
    print("  no_coord    : %i" % len(with_no_coord))
    print("  from catalog: %i" % from_catalog)
    print("-----------------")

    if len(with_error) and repeat:
        print("Retries parse areas with error")
        batch_parser(with_error, area_type=area_type, media_path=media_path, with_log=with_log, file_name=file_name,
                     catalog_path=catalog_path, coord_out=coord_out, repeat=repeat - 1, areas=areas, output=output,
                     delay=delay, with_proxy=with_proxy)
    else:
        path = batch_csv_output(output, areas, file_name)
        print("Create output complete: %s" % path)
        if len(with_no_coord):
            path = batch_csv_output(output, with_no_coord, "%s_no_coord" % file_name)
            print("Create output for no_coord complete: %s" % path)
        if len(features):
            batch_json_output(output, areas, file_name, with_attrs, coord_out)
        if len(with_error):
            print("-----------------")
            print("Error list:")
            for e in with_error:
                print(e)
Esempio n. 4
0
class Area:
    image_url = IMAGE_URL
    buffer = 10
    save_attrs = [
        "code", "area_type", "attrs", "image_path", "center", "extent",
        "image_extent", "width", "height"
    ]

    def __init__(self,
                 code="",
                 area_type=1,
                 epsilon=5,
                 media_path="",
                 with_log=True,
                 catalog="",
                 coord_out="EPSG:3857",
                 center_only=False,
                 with_proxy=False):
        self.with_log = with_log
        self.area_type = area_type
        self.media_path = media_path
        self.image_url = ""
        self.xy = []  # [[[area1], [hole1], [holeN]], [[area2]]]
        self.image_xy_corner = []  # cartesian coord from image, for draw plot
        self.width = 0
        self.height = 0
        self.image_path = ""
        self.extent = {}
        self.image_extent = {}
        self.center = {'x': None, 'y': None}
        self.center_only = center_only
        self.attrs = {}
        self.epsilon = epsilon
        self.code = code
        self.code_id = ""
        self.file_name = self.code[:].replace(":", "_")
        self.with_proxy = with_proxy

        self.coord_out = coord_out

        t = string.Template(SEARCH_URL)
        self.search_url = t.substitute({"area_type": area_type})
        t = string.Template(FEATURE_INFO_URL)
        self.feature_info_url = t.substitute({"area_type": area_type})

        if not self.media_path:
            # self.media_path = os.path.dirname(os.path.realpath(__file__))
            self.media_path = os.getcwd()
        if not os.path.isdir(self.media_path):
            os.makedirs(self.media_path)
        if catalog:
            self.catalog = Catalog(catalog)
            restore = self.catalog.find(self.code)
            if restore:
                self.restore(restore)
                self.log("%s - restored from %s" % (self.code, catalog))
                return
        if not code:
            return

        feature_info = self.download_feature_info()
        if feature_info:
            geometry = self.get_geometry()
            if catalog and geometry:
                self.catalog.update(self)
                self.catalog.close()
        else:
            self.log("Nothing found")

    def restore(self, restore):
        for a in self.save_attrs:
            setattr(self, a, restore[a])
        if self.coord_out:
            setattr(self, "coord_out", self.coord_out)
        setattr(self, "code_id", self.code)
        self.get_geometry()
        self.file_name = self.code.replace(":", "_")

    def get_coord(self):
        if self.xy:
            return self.xy
        center = self.get_center_xy()
        if center:
            return center
        return []

    def get_attrs(self):
        return self.attrs

    def _get_attrs_to_geojson(self):
        if self.attrs:
            for a in self.attrs:
                attr = self.attrs[a]
                if isinstance(attr, basestring):
                    try:
                        attr = attr.encode('utf-8').strip()
                        self.attrs[a] = attr
                    except:
                        pass
        return self.attrs

    def to_geojson_poly(self, with_attrs=False, dumps=True):
        return self.to_geojson("polygon", with_attrs, dumps)

    def to_geojson_center(self, with_attrs=False, dumps=True):
        current_center_status = self.center_only
        self.center_only = True
        to_return = self.to_geojson("point", with_attrs, dumps)
        self.center_only = current_center_status
        return to_return

    def to_geojson(self, geom_type="point", with_attrs=False, dumps=True):
        attrs = False
        if with_attrs:
            attrs = self._get_attrs_to_geojson()
        xy = []
        if self.center_only:
            xy = self.get_center_xy()
            geom_type = "point"
        else:
            xy = self.xy
        if xy and len(xy):
            feature_collection = coords2geojson(xy,
                                                geom_type,
                                                self.coord_out,
                                                attrs=attrs)
            if feature_collection:
                if dumps:
                    return json.dumps(feature_collection)
                return feature_collection
        return False

    def get_center_xy(self):
        center = self.attrs.get("center")
        if center:
            xy = [[[[center["x"], center["y"]]]]]
            return xy
        return False

    def make_request(self, url):
        response = make_request(url, self.with_proxy)
        return response

    def download_feature_info(self):
        try:
            search_url = self.feature_info_url + self.clear_code(self.code)
            self.log("Start downloading area info: %s" % search_url)
            response = self.make_request(search_url)
            resp = response
            data = json.loads(resp)
            if data:
                feature = data.get("feature")
                if feature:
                    attrs = feature.get("attrs")
                    if attrs:
                        self.attrs = attrs
                        self.code_id = attrs["id"]
                    if feature.get("extent"):
                        self.extent = feature["extent"]
                    if feature.get("center"):
                        x = feature["center"]["x"]
                        y = feature["center"]["y"]
                        if self.coord_out == "EPSG:4326":
                            (x, y) = xy2lonlat(x, y)
                        self.center = {"x": x, "y": y}
                        self.attrs["center"] = self.center
                        self.log("Area info downloaded.")
                return feature
        except TimeoutException:
            raise TimeoutException()
        except Exception as error:
            self.error(error)
        return False

    @staticmethod
    def clear_code(code):
        """remove first nulls from code  xxxx:00xx >> xxxx:xx"""
        return ":".join(map(lambda x: str(int(x)), code.split(":")))

    @staticmethod
    def get_extent_list(extent):
        """convert extent dick to ordered array"""
        return [extent["xmin"], extent["ymin"], extent["xmax"], extent["ymax"]]

    def get_buffer_extent_list(self):
        """add some buffer to ordered extent array"""
        ex = self.extent
        buf = self.buffer
        if ex and ex["xmin"]:
            ex = [
                ex["xmin"] - buf, ex["ymin"] - buf, ex["xmax"] + buf,
                ex["ymax"] + buf
            ]
        else:
            self.log("Area has no coordinates")
            # raise NoCoordinatesException()
        return ex

    def get_geometry(self):
        if self.center_only:
            return self.get_center_xy()
        else:
            return self.parse_geometry_from_image()

    def parse_geometry_from_image(self):
        formats = ["png"]
        tmp_dir = os.path.join(self.media_path, "tmp")
        if not os.path.isdir(tmp_dir):
            os.makedirs(tmp_dir)
        for f in formats:
            bbox = self.get_buffer_extent_list()
            if bbox:
                image = PkkAreaMerger(bbox=self.get_buffer_extent_list(),
                                      output_format=f,
                                      with_log=self.with_log,
                                      clear_code=self.clear_code(self.code_id),
                                      output_dir=tmp_dir,
                                      make_request=self.make_request)
                image.download()
                self.image_path = image.merge_tiles()
                self.width = image.real_width
                self.height = image.real_height
                self.image_extent = image.image_extent

                if image:
                    return self.get_image_geometry()

    def get_image_geometry(self):
        """
        get corner geometry array from downloaded image
        [area1],[area2] - may be multipolygon geometry
           |
        [self],[hole_1],[hole_N]     - holes is optional
           |
        [coord1],[coord2],[coord3]   - min 3 coord for polygon
           |
         [x,y]                       - coordinate pair

         Example:
             [[ [ [x,y],[x,y],[x,y] ], [ [x,y],[x,y],[x,y] ], ], [ [x,y],[x,y],[x,y] ], [ [x,y],[x,y],[x,y] ] ]
                -----------------first polygon-----------------  ----------------second polygon--------------
                ----outer contour---   --first hole contour-
        """
        image_xy_corner = self.image_xy_corner = self.get_image_xy_corner()
        if image_xy_corner:
            self.xy = copy.deepcopy(image_xy_corner)
            for geom in self.xy:
                for p in range(len(geom)):
                    geom[p] = self.image_corners_to_coord(geom[p])
            return self.xy
        return []

    def get_image_xy_corner(self):
        """get сartesian coordinates from raster"""
        import cv2

        if not self.image_path:
            return False
        image_xy_corners = []
        img = cv2.imread(self.image_path, cv2.IMREAD_GRAYSCALE)
        imagem = (255 - img)

        try:
            ret, thresh = cv2.threshold(imagem, 10, 128, cv2.THRESH_BINARY)
            try:
                contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
                                                       cv2.CHAIN_APPROX_SIMPLE)
            except Exception:
                im2, contours, hierarchy = cv2.findContours(
                    thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

            hierarchy = hierarchy[0]
            hierarhy_contours = [[] for _ in range(len(hierarchy))]
            for fry in range(len(contours)):
                currentContour = contours[fry]
                currentHierarchy = hierarchy[fry]
                cc = []
                # epsilon = 0.0005 * cv2.arcLength(contours[len(contours) - 1], True)
                approx = cv2.approxPolyDP(currentContour, self.epsilon, True)
                if len(approx) > 2:
                    for c in approx:
                        cc.append([c[0][0], c[0][1]])
                    parent_index = currentHierarchy[3]
                    index = fry if parent_index < 0 else parent_index
                    hierarhy_contours[index].append(cc)

            image_xy_corners = [c for c in hierarhy_contours if len(c) > 0]
            return image_xy_corners
        except Exception as ex:
            self.error(ex)
        return image_xy_corners

    def image_corners_to_coord(self, image_xy_corners):
        """calculate spatial coordinates from cartesian"""
        ex = self.get_extent_list(self.image_extent)
        dx = ((ex[2] - ex[0]) / self.width)
        dy = ((ex[3] - ex[1]) / self.height)
        xy_corners = []
        for im_x, im_y in image_xy_corners:
            x = ex[0] + (im_x * dx)
            y = ex[3] - (im_y * dy)
            if self.coord_out == "EPSG:4326":
                (x, y) = xy2lonlat(x, y)
            xy_corners.append([x, y])
        return xy_corners

    def show_plot(self):
        """Development tool"""
        import cv2
        try:
            from matplotlib import pyplot as plt
        except ImportError:
            self.error('Matplotlib is not installed.')
            raise ImportError('Matplotlib is not installed.')

        img = cv2.imread(self.image_path)
        for corners in self.image_xy_corner:
            for x, y in corners:
                cv2.circle(img, (x, y), 3, 255, -1)
        plt.imshow(img), plt.show()

    def log(self, msg):
        if self.with_log:
            print(msg)

    def error(self, msg):
        print('err')
Esempio n. 5
0
def batch_parser(codes, area_type=1, media_path="", with_log=False, catalog_path="", coord_out="EPSG:3857",
                 file_name="example", output=os.path.join("output"), repeat=5, areas=None, with_attrs=False):
    if areas is None:
        areas = []
    catalog = Catalog(catalog_path)
    restores = []
    with_error = []
    success = 0
    from_catalog = 0
    print("================================")
    print("Launched parsing of %i areas:" % len(codes))
    print("================================")
    for c in codes:
        code = c.strip()
        print("%s" % code, end="")
        restore = catalog.find(code)
        if not restore:
            try:
                area = Area(code, media_path=media_path, area_type=area_type, with_log=with_log, coord_out=coord_out)
                assert (len(area.get_coord()) > 0)
                restore = catalog.update(area)
                print(" - ok", end="")
                success += 1
            except Exception:
                area = None
                print(" - error", end="")
                with_error.append(code)
        else:
            print(" - ok, from catalog", end="")
            success += 1
            from_catalog += 1
            area = restore_area(restore, coord_out)
        percent = ((success + len(with_error)) / len(codes)) * 100
        print(", %i%%" % percent)
        restores.append(restore)

        if area:
            areas.append(area)
            area_json_output(output, area, with_attrs)
            area_csv_output(output, area)

    catalog.close()

    print("=================")
    print("Parsing complate:")
    print("  success     : %i" % success)
    print("  error       : %i" % len(with_error))
    print("  from catalog: %i" % from_catalog)
    print("-----------------")

    if len(with_error) and repeat:
        print("Retries parse areas with error")
        batch_parser(with_error, area_type=area_type, media_path=media_path, with_log=with_log, file_name=file_name,
                     catalog_path=catalog_path, coord_out=coord_out, repeat=repeat - 1, areas=areas, output=output)
    else:
        path = batch_csv_output(output, areas, file_name)
        print("Create output complete: %s" % path)
        if len(with_error):
            print("-----------------")
            print("Error list:")
            for e in with_error:
                print(e)