def __crawler(self): self.browser.get(self.url) self.soup = BeautifulSoup(self.browser.page_source, 'html.parser') page_height = self.browser.find_element_by_tag_name( "body").rect["height"] self.browser.set_window_size(setting.SCREEN_WIDTH, page_height) common.prepare_clean_dir(self.output_folder) self.browser.save_screenshot(self.output_folder + "/screenshot.png")
def gen_candidate_database(): from imagefeature import ImageFeature print("Candidate Matching Database Generation Start") common.prepare_clean_dir(Path("temp/")) IF = ImageFeature() query_features, query_pathes, orbs = [], [], [] for img_file in sorted(Path("input/query/").glob("*")): query_pathes.append(img_file) x = IF.get_feature(img_file) query_features.append(x) print("Extracting Query Feature", img_file) target_class_names, target_features, target_pathes = [], [], [] for folder in sorted(Path("input/target/").glob("*")): class_name = folder.stem for img_file in sorted(Path("input/target/%s/" % class_name).glob("*")): target_pathes.append(img_file) target_class_names.append(class_name) feature = IF.get_feature(img_file) target_features.append(feature) print("Extracting Target Feature", img_file) print("Calculating Similarities...") sims = cosine_similarity(query_features, target_features) candidate_matching_database = {} for query_index, row in enumerate(sims): query_file = query_pathes[query_index] candidate_matching_database[query_file] = {} args = np.argsort(row) args = args[::-1] for arg in args: target_path = target_pathes[arg] target_class_name = target_class_names[arg] if target_class_name not in candidate_matching_database[query_file]: candidate_matching_database[query_file][target_class_name] = [] if len(candidate_matching_database[query_file][target_class_name]) < setting.MAX_NUMBER_ONE_CLASS: candidate_matching_database[query_file][target_class_name].append((target_path, row[arg])) common.save_pickle(Path("temp/candidate_matching_database.pickle"), candidate_matching_database) print("Candidate Matching Database Generation Finish")
def __output_images(self): tmp_path = self.output_folder + "/tmp" path = self.output_folder + "/images" common.prepare_clean_dir(tmp_path) common.prepare_clean_dir(path) for segment in self.json_data["segments"]: for record in segment["records"]: for i, image in enumerate(record["images"]): try: file_name = "%s_%s" % (record["record_id"], i) source_file_name_only = tmp_path + "/" + file_name original_extension = image["src"].split('/')[-1].split( '.')[-1].split("?")[0] source_file_name = source_file_name_only + "." + original_extension target_file_name = path + "/" + file_name + "." + setting.OUTPUT_IMAGE_TYPE r = requests.get(image["src"], stream=True, headers={'User-agent': 'Mozilla/5.0'}) if r.status_code == 200: with open(source_file_name, 'wb') as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) else: continue [R, G, B] = [int(a) for a in image["bg_color"].split(",")] im = Image.open(source_file_name).convert('RGBA') bg = Image.new("RGB", im.size, (R, G, B)) bg.paste(im, im) im = bg im.save(target_file_name) image["path"] = target_file_name except Exception: pass common.save_json(self.output_folder + "/result.json", self.json_data, encoding=setting.OUTPUT_JSON_ENCODING) shutil.rmtree(tmp_path)
def match(): import deepmatching_wrapper as dm import cv2 candidate_matching_database = common.load_pickle(Path("temp/candidate_matching_database.pickle")) common.prepare_clean_dir(Path("output/")) common.prepare_clean_dir(Path("output/images/")) output = {} for query_file, candidates in candidate_matching_database.items(): query_name = Path(query_file).stem matching_result = [] for target_class_name, target_images in candidates.items(): for i, (target_path, similarity) in enumerate(target_images): print("Matching", query_file, "with target image", target_path) matches, name1, name2, qw, qh, tw, th, img1, img2 = dm.match(query_file, target_path) src_pts = np.float32([[m[0], m[1]] for m in matches]) dst_pts = np.float32([[m[2], m[3]] for m in matches]) i = 0 inlier = [] M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, setting.RANSAC_THRESHOLD) for index, m in enumerate(mask): if np.isclose(m, 1): i += 1 inlier.append(matches[index]) output_name = "%s_%s_%02d.jpg" % (query_name, target_class_name, i) dm.draw(img1, img2, inlier, Path("output/images/") / output_name) matching_result.append({ "class_name": target_class_name, "inlier": len(inlier) }) output[query_file.name] = sorted(matching_result, key=lambda x: x["inlier"], reverse=True) common.write_json(Path("output/result.json"), output)