def calculate_word_features(word, feature_extractor, feature_size, nb_epoch): word_images = generate_word_images(word) word_images = [np.uint8(wi * (255 / wi.max())) for wi in word_images] write_image_list(word_images, "word-images") print("Generated {} word images for {}".format(len(word_images), word)) word_components = [aui.text_image_segmentation(wi) for wi in word_images] components_per_word = [len(c) for c in word_components] max_components_per_word = max(components_per_word) word_feature_table = np.empty(shape=(len(word_components), max_components_per_word, feature_size), dtype=np.float) word_feature_table.fill(np.nan) size_table = np.empty_like(shape=(len(word_components), max_components_per_word), dtype=np.float) size_table.fill(np.nan) distance_table = np.empty_like(shape=(len(word_components), max_components_per_word), dtype=np.float) distance_table.fill(np.nan) for i, wc in enumerate(word_components): wf, bb = wc feature_vals = feature_extractor.predict(wf) size_vals = bb['area'] / bb['area'].sum() distance_vals = (bb['cx'] + bb['cy']) / bb['area'].max() word_feature_table[i, :wf.shape[0], :] = feature_vals size_table[i, :wf.shape[0]] = size_vals distance_table[i, :wf.shape[0]] = distance_vals return word_feature_table, size_table, distance_table
def create_letter_group_image(lg, font, dpi, output_dir): otm = aua.visenc_to_arabic(lg) word_image = aui.get_gray_word_image(otm, fontname=font, dpi=dpi, background=0, foreground=255) components, stats = aui.text_image_segmentation(word_image) LOG.debug("=== lg ===") LOG.debug(lg) LOG.debug("=== components.shape ===") LOG.debug(components.shape) LOG.debug("=== stats.shape ===") LOG.debug(stats.shape) # aui.write_image_array(components, lg) stats_sorted_area = np.argsort(stats['area']) stem_label = aua.delete_visenc_diacritics(lg) li = stats_sorted_area[-1] largest = components[li] largest_dir = os.path.join(output_dir, stem_label) largest_file = os.path.join( largest_dir, "cc-{}-font-{}-dpi-{}-w-{}-h-{}-area-{}.png".format( stem_label, font.replace(" ", "").lower(), dpi, stats[li]["w"], stats[li]["h"], stats[li]["area"])) LOG.debug("=== stem_label ===") LOG.debug(stem_label) if not os.path.isdir(largest_dir): os.makedirs(largest_dir) cv2.imwrite(largest_file, largest) if len(stats) == 2: # if there is a single diacritic di = stats_sorted_area[0] diacritic = components[di] diacritic_label = aua.delete_visenc_stems(lg) n_diacritics = (diacritic_label.count('o') + diacritic_label.count('u') + diacritic_label.count('i')) if n_diacritics == 1: diacritic_label = re.sub(r'^[oui]?(.*)', r'\1', diacritic_label) LOG.debug("=== diacritic_label ===") LOG.debug(diacritic_label) diacritic_dir = os.path.join(output_dir, diacritic_label) diacritic_file = os.path.join( diacritic_dir, "cc-{}-font-{}-dpi-{}-w-{}-h-{}-area-{}.png".format( diacritic_label, font.replace(" ", "").lower(), dpi, stats[di]["w"], stats[di]["h"], stats[di]["area"])) if not os.path.isdir(diacritic_dir): os.makedirs(diacritic_dir) cv2.imwrite(diacritic_file, diacritic)
def test_generate_component_map(): words = [("abcd", 4), ("ijkl", 6), ("şğuf", 5), ("ibadullah", 10)] for w, expected in words: w_img = aui.get_gray_word_image(w, fontname="Amiri", dpi=600, background=0, foreground=255) LOG.debug("=== w_img.shape ===") LOG.debug(w_img.shape) imgs, stats = aui.text_image_segmentation(w_img) component_map_1 = aui.generate_component_map(stats, PROXIMITY_BOUND=1) component_map_2 = aui.generate_component_map(stats, PROXIMITY_BOUND=2) assert component_map_1.shape[0] < component_map_2.shape[0]
def test_text_image_segmentation(): words = [("abcd", 4), ("ijkl", 6), ("şğuf", 5), ("ibadullah", 10)] for w, expected in words: w_img = aui.get_gray_word_image(w, fontname="Amiri", dpi=600, background=0, foreground=255) LOG.debug("=== w_img.shape ===") LOG.debug(w_img.shape) imgs, stats = aui.text_image_segmentation(w_img) res_n, res_rows, res_cols = imgs.shape assert (res_rows, res_cols) == w_img.shape assert res_n == expected, "w: {}, res_n: {}, expected: {}".format( w, res_n, expected)
def test_generate_component_graph(): import matplotlib.pyplot as plt import networkx as nx words = [("abcd", 4), ("ijkl", 6), ("şğuf", 5), ("ibadullah", 10)] for w, expected in words: w_img = aui.get_gray_word_image(w, fontname="Amiri", dpi=600, background=0, foreground=255) LOG.debug("=== w_img.shape ===") LOG.debug(w_img.shape) imgs, stats = aui.text_image_segmentation(w_img) component_map = aui.generate_component_map(stats, PROXIMITY_BOUND=0.5) graph = aui.component_map_to_nx(stats, component_map) pos = nx.spring_layout(graph) nx.draw_networkx_nodes(graph, pos) nx.draw_networkx_edges(graph, pos) plt.savefig('/tmp/{}-graph.png'.format(w)) plt.clf() assert nx.number_of_nodes(graph) == expected
def text_object_segmentation(layers): all_results = [] for i in range(layers.shape[0]): all_results.append(aui.text_image_segmentation(layers[i])) if len(all_results) > 0: tot_len = sum([ir.shape[0] for ir, sr in all_results]) first_img = all_results[0][0] first_stat = all_results[0][1] all_images = np.empty(shape=(tot_len, first_img.shape[1], first_img.shape[2]), dtype=first_img.dtype) all_stats = np.empty(shape=(tot_len,), dtype=first_stat.dtype) ii = 0 for ir, sr in all_results: li = ir.shape[0] ni = ii + li all_images[ii:ni] = ir[0:li] all_stats[ii:ni] = sr[0:li] ii = ni return all_images, all_stats else: # if len(all_results) > 0 return None, None
def search_in_image_via_components(word_features, img, feature_extractor, text_detection_model, feature_size=64): LOG.debug("=== word_features.shape ===") LOG.debug(word_features.shape) image_layers = aui.color_quantization(img) aui.write_image_array(image_layers, "layers") image_layers[image_layers.nonzero()] = 255 image_layers = image_layers[:, :, :, 0] dist_tr_layers = [ aui.distance_transform(image_layers[i]) for i in range(image_layers.shape[0]) ] dist_tr_layers = [np.uint8(wi * (255 / wi.max())) for wi in dist_tr_layers] image_component_layers_list = [ aui.text_image_segmentation(img, min_area=5, max_area=0.3) for img in dist_tr_layers ] image_components_list = [ aui.resize_components(img_comp, img_stat, feature_size, feature_size) for img_comp, img_stat in image_component_layers_list ] search_tables = [] search_list = [] for i, component_array in enumerate(image_components_list): aui.write_image_array(component_array, str(i)) ica = component_array.reshape( (component_array.shape[0], 1, component_array.shape[1], component_array.shape[2])) ica_features = calculate_component_features(ica, encoder, feature_size) ica_stats = image_component_layers_list[i][1] search_table = np.empty(shape=(img.shape[0], img.shape[1]), dtype=np.float) n_wc = word_features.shape[0] n_ic = ica_features.shape[0] for i_w in range(n_wc): for i_c in range(n_ic): loc = ica_stats[i_c] dist = nl.norm(word_features[i_w] - ica_features[i_c]) x1, x2 = (loc['x'], loc['x'] + loc['w']) y1, y2 = (loc['y'], loc['y'] + loc['h']) search_table[x1:x2, y1:y2] = dist search_list.append((x1, x2, y1, y2, loc['area'], dist)) search_tables.append(search_table) res_img = img.copy() LOG.debug("=== search_list ===") LOG.debug(search_list) sorted_locs = sorted(search_list, key=lambda s: s[5]) for s in sorted_locs[:10]: cv2.rectangle(res_img, (s[2], s[3]), (s[0], s[1]), (255, 0, 0)) return res_img