def __init__(self, engine_sim_interpolation: int = None, *_, **__): logger.info(f'engine {self.get_type()} preparing ...') self.engine_sim_interpolation = engine_sim_interpolation or self.DEFAULT_INTERPOLATION logger.debug(f'interpolation: {self.DEFAULT_INTERPOLATION}') logger.info(f'engine {self.get_type()} loaded')
def __init__(self, engine_template_cv_method_name: str = None, engine_template_scale: typing.Sequence = None, engine_template_multi_target_max_threshold: float = None, engine_template_multi_target_distance_threshold: float = None, *_, **__): """ eg: engine_template_cv_method_name -> cv_method_name """ logger.info('engine {} preparing ...'.format(self.get_type())) # cv self.cv_method_name = engine_template_cv_method_name or self.DEFAULT_CV_METHOD_NAME self.cv_method_code = eval(self.cv_method_name) # scale self.scale = engine_template_scale or self.DEFAULT_SCALE # multi target max threshold ( max_val * max_threshold == real threshold ) self.multi_target_max_threshold = engine_template_multi_target_max_threshold or self.DEFAULT_MULTI_TARGET_MAX_THRESHOLD self.multi_target_distance_threshold = engine_template_multi_target_distance_threshold or self.DEFAULT_MULTI_TARGET_DISTANCE_THRESHOLD logger.debug(f'cv method: {self.cv_method_name}') logger.debug(f'scale: {self.scale}') logger.debug( f'multi target max threshold: {self.multi_target_max_threshold}') logger.debug( f'multi target distance threshold: {self.multi_target_distance_threshold}' ) logger.info(f'engine {self.get_type()} loaded')
def __init__(self, engine_feature_cluster_num: int = None, engine_feature_distance_threshold: float = None, *_, **__): logger.info('engine {} preparing ...'.format(self.get_type())) # for kmeans calculation self.cluster_num: int = engine_feature_cluster_num or self.DEFAULT_CLUSTER_NUM # for feature matching self.distance_threshold: float = engine_feature_distance_threshold or self.DEFAULT_DISTANCE_THRESHOLD logger.debug('cluster num: {}'.format(self.cluster_num)) logger.debug('distance threshold: {}'.format(self.distance_threshold)) logger.info('engine {} loaded'.format(self.get_type()))
def __init__( self, engine_template_cv_method_name: str = None, engine_template_scale: typing.Sequence = None, engine_template_multi_target_max_threshold: float = None, engine_template_multi_target_distance_threshold: float = None, engine_template_compress_rate: float = None, *_, **__, ): """ eg: engine_template_cv_method_name -> cv_method_name """ logger.info(f"engine {self.get_type()} preparing ...") # cv self.engine_template_cv_method_name = (engine_template_cv_method_name or self.DEFAULT_CV_METHOD_NAME) self.engine_template_cv_method_code = eval( self.engine_template_cv_method_name) # scale self.engine_template_scale = engine_template_scale or self.DEFAULT_SCALE # multi target max threshold ( max_val * max_threshold == real threshold ) self.engine_template_multi_target_max_threshold = ( engine_template_multi_target_max_threshold or self.DEFAULT_MULTI_TARGET_MAX_THRESHOLD) self.engine_template_multi_target_distance_threshold = ( engine_template_multi_target_distance_threshold or self.DEFAULT_MULTI_TARGET_DISTANCE_THRESHOLD) # compression self.engine_template_compress_rate = (engine_template_compress_rate or self.DEFAULT_COMPRESS_RATE) logger.debug(f"cv method: {self.engine_template_cv_method_name}") logger.debug(f"scale: {self.engine_template_scale}") logger.debug( f"multi target max threshold: {self.engine_template_multi_target_max_threshold}" ) logger.debug( f"multi target distance threshold: {self.engine_template_multi_target_distance_threshold}" ) logger.debug(f"compress rate: {self.engine_template_compress_rate}") logger.info(f"engine {self.get_type()} loaded")
def _find_without_template(self, target_pic_object: np.ndarray, target_pic_name: str = None, *args, **kwargs) -> dict: logger.debug(f'start analysing: [{target_pic_name}] ...') current_result = dict() for each_engine in self.engine_list: each_result = each_engine.execute(None, target_pic_object, *args, **kwargs) # result filter each_result = self._prune_result(each_result) current_result[each_engine.get_type()] = each_result logger.debug( f'result for [{target_pic_name}]: {json.dumps(current_result)}') return { target_pic_name: current_result, }
def get_feature_point_list( self, template_pic_object: np.ndarray, target_pic_object: np.ndarray) -> typing.Sequence[Point]: """ compare via feature matching :param template_pic_object: :param target_pic_object: :return: """ # Initiate SURF detector surf = cv2.xfeatures2d.SURF_create(self.engine_feature_min_hessian) # find the keypoints and descriptors with SURF template_kp, template_desc = surf.detectAndCompute( template_pic_object, None) target_kp, target_desc = surf.detectAndCompute(target_pic_object, None) # key points count logger.debug(f'template key point count: {len(template_kp)}') logger.debug(f'target key point count: {len(target_kp)}') # find 2 points, which are the closest # 找到帧和帧之间的一致性的过程就是在一个描述符集合(询问集)中找另一个集合(相当于训练集)的最近邻。 这里找到 每个描述符 的 最近邻与次近邻 # 一个正确的匹配会更接近第一个邻居。换句话说,一个不正确的匹配,两个邻居的距离是相似的。因此,我们可以通过查看二者距离的不同来评判距匹配程度的好坏。 # more details: https://blog.csdn.net/liangjiubujiu/article/details/80418079 flann = cv2.FlannBasedMatcher() matches = flann.knnMatch(template_desc, target_desc, k=2) # matches are something like: # [[<DMatch 0x12400a350>, <DMatch 0x12400a430>], [<DMatch 0x124d6a170>, <DMatch 0x124d6a450>]] logger.debug(f'matches num: {len(matches)}') # TODO here is a sample to show feature points # temp = cv2.drawMatchesKnn(template_pic_object, kp1, target_pic_object, kp2, matches, None, flags=2) # cv2.imshow('feature_points', temp) # cv2.waitKey(0) # filter for invalid points good = [] # only one matches if len(matches) == 1: good = matches[0] # more than one matches else: for m, n in matches: if m.distance < self.engine_feature_distance_threshold * n.distance: good.append(m) # get positions point_list = list() for each in good: target_idx = each.trainIdx each_point = Point(*target_kp[target_idx].pt) point_list.append(each_point) return point_list
def __init__(self, engine_ocr_lang: str = None, *_, **__): logger.info(f'engine {self.get_type()} preparing ...') # check language data before execute function, not here. self.engine_ocr_lang = engine_ocr_lang or self.DEFAULT_LANGUAGE self.engine_ocr_tess_data_dir, self.engine_ocr_available_lang_list = tesserocr.get_languages( ) logger.debug(f'target lang: {self.engine_ocr_lang}') logger.debug(f'tess data dir: {self.engine_ocr_tess_data_dir}') logger.debug( f'available language: {self.engine_ocr_available_lang_list}') logger.info(f'engine {self.get_type()} loaded')
def __init__(self, engine_ocr_lang: str = None, *_, **__): logger.info(f"engine {self.get_type()} preparing ...") # check language data before execute function, not here. self.engine_ocr_lang = engine_ocr_lang or self.DEFAULT_LANGUAGE self.engine_ocr_offset = self.DEFAULT_OFFSET self.engine_ocr_deep = self.DEFAULT_DEEP assert findtext, "findtext should be installed if you want to use OCR engine" self._ft = findtext.FindText(lang=engine_ocr_lang) self.engine_ocr_tess_data_dir = self._ft.get_data_home() self.engine_ocr_available_lang_list = self._ft.get_available_lang() logger.debug(f"target lang: {self.engine_ocr_lang}") logger.debug(f"tess data dir: {self.engine_ocr_tess_data_dir}") logger.debug( f"available language: {self.engine_ocr_available_lang_list}") logger.info(f"engine {self.get_type()} loaded")
def _find_with_template(self, target_pic_object: np.ndarray, _mark_pic: bool = None, *args, **kwargs) -> dict: # pre assert assert self.template, 'template is empty' result = dict() for each_template_name, each_template_object in self.template.items(): logger.debug(f'start analysing: [{each_template_name}] ...') current_result = dict() for each_engine in self.engine_list: each_result = each_engine.execute(each_template_object, target_pic_object, *args, **kwargs) # for debug ONLY! if _mark_pic: target_pic_object_with_mark = toolbox.mark_point( target_pic_object, each_result['target_point'], cover=False) temp_pic_path = toolbox.debug_cv_object( target_pic_object_with_mark) logger.debug(f'template: {each_template_name}, ' f'engine: {each_engine.get_type()}, ' f'path: {temp_pic_path}') # result filter each_result = self._prune_result(each_result) current_result[each_engine.get_type()] = each_result logger.debug( f'result for [{each_template_name}]: {json.dumps(current_result)}' ) result[each_template_name] = current_result return result
def _find_with_template(self, target_pic_object: np.ndarray, _mark_pic: bool = None, *args, **kwargs) -> dict: # pre assert assert not self.template.is_empty(), "template is empty" result = dict() for each_template_name, each_template_object in self.template.load(): logger.debug(f"start analysing: [{each_template_name}] ...") # todo lazy load current_result = dict() for each_engine in self.engine_list: each_result = each_engine.execute(each_template_object, target_pic_object, *args, **kwargs) # for debug ONLY! if _mark_pic: target_pic_object_with_mark = toolbox.mark_point( target_pic_object, each_result["target_point"], cover=False) temp_pic_path = toolbox.debug_cv_object( target_pic_object_with_mark) logger.debug(f"template: {each_template_name}, " f"engine: {each_engine.get_type()}, " f"path: {temp_pic_path}") # result filter each_result = self._prune_result(each_result) current_result[each_engine.get_type()] = each_result logger.debug( f"result for [{each_template_name}]: {json.dumps(current_result, default=lambda x: x.__dict__)}" ) result[each_template_name] = current_result return result
def __init__(self, engine_feature_cluster_num: int = None, engine_feature_distance_threshold: float = None, engine_feature_min_hessian: int = None, *_, **__): logger.info(f'engine {self.get_type()} preparing ...') # for kmeans calculation self.engine_feature_cluster_num: int = engine_feature_cluster_num or self.DEFAULT_CLUSTER_NUM # for feature matching self.engine_feature_distance_threshold: float = engine_feature_distance_threshold or self.DEFAULT_DISTANCE_THRESHOLD # for determining if a point is a feature point # higher threshold, less points self.engine_feature_min_hessian: int = engine_feature_min_hessian or self.DEFAULT_MIN_HESSIAN logger.debug(f'cluster num: {self.engine_feature_cluster_num}') logger.debug( f'distance threshold: {self.engine_feature_distance_threshold}') logger.debug(f'hessian threshold: {self.engine_feature_min_hessian}') logger.info(f'engine {self.get_type()} loaded')
def _compare_template( self, template_pic_object: np.ndarray, target_pic_object: np.ndarray, scale: typing.Sequence, mask_pic_object: np.ndarray = None) -> typing.Sequence: """ compare via template matching (https://www.pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/) :param template_pic_object: :param target_pic_object: :param scale: default to (1, 3, 10) :param mask_pic_object: :return: min_val, max_val, min_loc, max_loc """ pic_height, pic_width = target_pic_object.shape[:2] result_list = list() for each_scale in np.linspace(*scale): # resize template resize_template_pic_object = toolbox.resize_pic_scale( template_pic_object, each_scale) # resize mask if mask_pic_object is not None: mask_pic_object = toolbox.resize_pic_scale( mask_pic_object, each_scale) # if template's size is larger than raw picture, break if resize_template_pic_object.shape[ 0] > pic_height or resize_template_pic_object.shape[ 1] > pic_width: break res = cv2.matchTemplate(target_pic_object, resize_template_pic_object, self.cv_method_code, mask=mask_pic_object) # each of current result is: # [(min_val, max_val, min_loc, max_loc), point_list, shape] current_result = [ *self._parse_res(res), resize_template_pic_object.shape ] result_list.append(current_result) # too much log here, remove it. # logger.debug('scale search result: {}'.format(result_list)) # get the best one loc_val, point_list, shape = sorted(result_list, key=lambda i: i[0][1])[-1] min_val, max_val, min_loc, max_loc = loc_val # fix position logger.debug('raw compare result: {}, {}, {}, {}'.format( min_val, max_val, min_loc, max_loc)) min_loc, max_loc = map( lambda each_location: list( toolbox.fix_location(shape, each_location)), [min_loc, max_loc]) point_list = [ list(toolbox.fix_location(shape, each)) for each in toolbox.point_list_filter( point_list, self.multi_target_distance_threshold) ] logger.debug('fixed compare result: {}, {}, {}, {}'.format( min_val, max_val, min_loc, max_loc)) return min_val, max_val, min_loc, max_loc, point_list
import numpy as np import warnings import typing from findit.logger import logger from findit.engine.base import FindItEngine, FindItEngineResponse try: import findtext except ImportError: logger.debug("findtext should be installed if you want to use OCR engine") class OCREngine(FindItEngine): """ OCR engine, binding to tesseract """ # language settings, same as tesseract # if you want to use chi_sim and eng, you can set it 'chi_sim+eng' DEFAULT_LANGUAGE: str = "eng" # offset for words ( sometimes causes out of range, take care ) DEFAULT_OFFSET: int = 0 # deep query DEFAULT_DEEP: bool = False def __init__(self, engine_ocr_lang: str = None, *_, **__): logger.info(f"engine {self.get_type()} preparing ...") # check language data before execute function, not here. self.engine_ocr_lang = engine_ocr_lang or self.DEFAULT_LANGUAGE self.engine_ocr_offset = self.DEFAULT_OFFSET self.engine_ocr_deep = self.DEFAULT_DEEP
def _compare_template( self, template_pic_object: np.ndarray, target_pic_object: np.ndarray, scale: typing.Sequence, mask_pic_object: np.ndarray = None, ) -> typing.Sequence: """ compare via template matching (https://www.pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/) :param template_pic_object: :param target_pic_object: :param scale: default to (1, 3, 10) :param mask_pic_object: :return: min_val, max_val, min_loc, max_loc """ result_list = list() # compress pic_width, pic_height = target_pic_object.shape[:2] logger.debug( f"target object size before compressing: w={pic_width}, h={pic_height}" ) target_pic_object = toolbox.compress_frame( target_pic_object, self.engine_template_compress_rate) pic_width, pic_height = target_pic_object.shape[:2] logger.debug( f"target object size after compressing: w={pic_width}, h={pic_height}" ) for each_scale in np.linspace(*scale): # resize template resize_template_pic_object = toolbox.resize_pic_scale( template_pic_object, each_scale) # resize mask if mask_pic_object is not None: mask_pic_object = toolbox.resize_pic_scale( mask_pic_object, each_scale) # if template's size is larger than raw picture, break if (resize_template_pic_object.shape[0] > pic_width or resize_template_pic_object.shape[1] > pic_height): break res = cv2.matchTemplate( target_pic_object, resize_template_pic_object, self.engine_template_cv_method_code, mask=mask_pic_object, ) # each of current result is: # [(min_val, max_val, min_loc, max_loc), point_list, shape] current_result = [ *self._parse_res(res), resize_template_pic_object.shape ] result_list.append(current_result) # too much log here, remove it. # logger.debug('scale search result: {}'.format(result_list)) # get the best one try: loc_val, point_list, shape = sorted(result_list, key=lambda i: i[0][1])[-1] except IndexError: raise IndexError(""" template picture is larger than your target. 1. pick another template picture. 2. set engine_template_scale in __init__, see demo.py for details. """) min_val, max_val, min_loc, max_loc = loc_val # fix position logger.debug(f"raw compare result: {max_loc}, {max_val}") min_loc, max_loc = map( lambda each_location: list( toolbox.fix_location(shape, each_location)), [min_loc, max_loc], ) # de compress logger.debug(f"decompress compare result: {max_loc}, {max_val}") min_loc, max_loc = map( lambda p: toolbox.decompress_point( p, self.engine_template_compress_rate), [min_loc, max_loc], ) point_list = [ list(toolbox.fix_location(shape, each)) for each in toolbox.point_list_filter( point_list, self.engine_template_multi_target_distance_threshold) ] # sort point list point_list.sort(key=lambda i: i[0]) logger.debug(f"fixed compare result: {max_loc}, {max_val}") return min_val, max_val, min_loc, max_loc, point_list
def find(self, target_pic_name: str, target_pic_path: str = None, target_pic_object: np.ndarray = None, mark_pic: bool = None, *args, **kwargs): """ start match :param target_pic_name: eg: 'your_target_picture_1' :param target_pic_path: '/path/to/your/target.png' :param target_pic_object: your_pic_cv_object (loaded by cv2) :param mark_pic: enable this, and you will get a picture file with a mark of result :return: """ # pre assert assert self.template, 'template is empty' assert (target_pic_path is not None) or (target_pic_object is not None), 'need path or cv object' # load target logger.info('start finding ...') target_pic_object = toolbox.pre_pic(target_pic_path, target_pic_object) start_time = toolbox.get_timestamp() result = dict() for each_template_name, each_template_object in self.template.items(): logger.debug( 'start analysing: [{}] ...'.format(each_template_name)) current_result = dict() for each_engine in self.engine_list: each_result = each_engine.execute(each_template_object, target_pic_object, *args, **kwargs) # need mark? if mark_pic: target_pic_object_with_mark = toolbox.mark_point( target_pic_object, each_result['target_point'], cover=False) os.makedirs(start_time, exist_ok=True) mark_pic_path = '{}/{}_{}.png'.format( start_time, each_template_name, each_engine.get_type()) cv2.imwrite(mark_pic_path, target_pic_object_with_mark) logger.debug( 'save marked picture to {}'.format(mark_pic_path)) # result filter each_result = self._prune_result(each_result) current_result[each_engine.get_type()] = each_result logger.debug('result for [{}]: {}'.format( each_template_name, json.dumps(current_result))) result[each_template_name] = current_result final_result = { 'target_name': target_pic_name, 'target_path': target_pic_path, 'data': result, } logger.info('result: {}'.format(json.dumps(final_result))) return final_result
def get_feature_point_list( self, template_pic_object: np.ndarray, target_pic_object: np.ndarray) -> typing.Sequence[Point]: """ compare via feature matching :param template_pic_object: :param target_pic_object: :return: """ # IMPORTANT # sift and surf can not be used in python >= 3.8 # so we switch it to ORB detector # maybe not enough precisely now # Initiate ORB detector orb = cv2.ORB_create() # find the keypoints and descriptors with ORB template_kp, template_desc = orb.detectAndCompute( template_pic_object, None) target_kp, target_desc = orb.detectAndCompute(target_pic_object, None) # key points count logger.debug(f"template key point count: {len(template_kp)}") logger.debug(f"target key point count: {len(target_kp)}") # find 2 points, which are the closest # 找到帧和帧之间的一致性的过程就是在一个描述符集合(询问集)中找另一个集合(相当于训练集)的最近邻。 这里找到 每个描述符 的 最近邻与次近邻 # 一个正确的匹配会更接近第一个邻居。换句话说,一个不正确的匹配,两个邻居的距离是相似的。因此,我们可以通过查看二者距离的不同来评判距匹配程度的好坏。 # more details: https://blog.csdn.net/liangjiubujiu/article/details/80418079 # flann = cv2.FlannBasedMatcher() # matches = flann.knnMatch(template_desc, target_desc, k=2) bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) # 特征描述子匹配 matches = bf.knnMatch(template_desc, target_desc, k=1) # matches are something like: # [[<DMatch 0x12400a350>, <DMatch 0x12400a430>], [<DMatch 0x124d6a170>, <DMatch 0x124d6a450>]] logger.error(f"matches num: {len(matches)}") print(f"matches num: {len(matches)}") # TODO here is a sample to show feature points # temp = cv2.drawMatchesKnn(template_pic_object, kp1, target_pic_object, kp2, matches, None, flags=2) # cv2.imshow('feature_points', temp) # cv2.waitKey(0) good = list() if matches: good = matches[0] # get positions point_list = list() for each in good: target_idx = each.trainIdx each_point = Point(*target_kp[target_idx].pt) point_list.append(each_point) return point_list