def __detect_directs(self, start): if self.components is None: return opt_direct_steps = [] shift = Point() for direct in self.directs: opt_step = 0 opt_shift = 0 for step_width in range(-self.line_width, self.line_width + 1): new_step = self.__iter_direct( start + abs(direct.ort()) * step_width, direct) if opt_step < new_step: opt_step = new_step opt_shift = abs(direct.ort()) * step_width if self.__is_detect(opt_step): break if self.__is_detect(opt_step): shift += opt_shift if shift.x == 0 and opt_shift.x != 0 else Point( 0, 0) shift += opt_shift if shift.y == 0 and opt_shift.y != 0 else Point( 0, 0) opt_direct_steps.append(opt_step) detected_directs = [ self.__is_detect(opt_step) for opt_step in opt_direct_steps ] return detected_directs, shift
def process_image(self): if not self.__image.load(): return None self.__unique_clusters = defaultdict() self.__x = [] self.__y = [] # TODO: перенести выделение компонент связности сюда self.__image = self.lines_detector.get_points(self.__image) if len(self.__image.description.points) == 0: return [] self.__image, components = self.connected_components.transform( self.__image) if self.__image.get_zone_labels() == 0: return [] # TODO: при разрывных границах смотри несколько первых кластеров. 9.pdf страница 1. Возможно, достаточно # TODO: посчитать размерность Минковского, вместо выделения компонент связности # TODO: (https://habrahabr.ru/post/208368/) # Выделение узловых точек active_points = self.__select_points(components) if len(active_points) == 0: return [] # Процесс получения близких точек active_points_list = list(map(lambda x: next(iter(x)), active_points)) unique_active_points = self.__get_almost_unique_points( active_points_list, Point.mean_points) # Среди близких выбираем одну for ind, point in enumerate(unique_active_points): self.__x.append([point.x, ind]) self.__y.append([point.y, ind]) self.__x = sorted(self.__x, key=itemgetter(0)) self.__y = sorted(self.__y, key=itemgetter(0)) self.__x, unique_active_points = self.__alignment_coords( self.__x, unique_active_points, lambda arg, x: Point(x=x, y=arg.y)) self.__y, unique_active_points = self.__alignment_coords( self.__y, unique_active_points, lambda arg, y: Point(y=y, x=arg.x)) self.__x = sorted(list(set(self.__x))) self.__y = sorted(list(set(self.__y))) self.ocr = None if self.ocr is not None: table = self.ocr.recognize_table(self.__image.matrix, self.__create_table()) else: table = self.__create_table() return table
def __init__(self, max_steps=12, line_width=2, detected_steps=11): self.max_steps = max_steps self.line_width = line_width self.detected_steps = detected_steps self.directs = [ Point(x=0, y=1), Point(x=1, y=0), Point(x=-1, y=0), Point(x=0, y=-1) ] self.the_same_directs = [ abs(direct) == Point(x=0, y=1) for direct in self.directs ] self.components = None self.labels = set()
def __get_points(cls, horizontal, vertical): points = [] for point_h in horizontal: for point_v in vertical: points.append(Point(y=point_h, x=point_v)) return points
def __create_zone(cls, label, components): return Rectangle( min_x=components[2][label, cv2.CC_STAT_LEFT], min_y=components[2][label, cv2.CC_STAT_TOP], max_x=components[2][label, cv2.CC_STAT_LEFT] + components[2][label, cv2.CC_STAT_WIDTH], max_y=components[2][label, cv2.CC_STAT_TOP] + components[2][label, cv2.CC_STAT_HEIGHT], total_area=components[2][label, cv2.CC_STAT_AREA], label=label, centroid=Point(y=components[3][1], x=components[3][0]), )
def __get_almost_unique_points(self, active_points, mean): unique_active_points = [] distance, index = KDTree(active_points, leaf_size=self.leaf_size).query(active_points, k=self.knn) used_points = set() for ind in range(len(distance)): if ind in used_points: continue used_points.add(ind) nearest_points_ind = index[ind][distance[ind] < self.opt_dist] used_points = used_points.union(set(nearest_points_ind)) nearest_points = [ Point(x=active_points[ind][0], y=active_points[ind][1]) for ind in nearest_points_ind ] unique_active_points.append(mean(nearest_points)) return unique_active_points