def test_decompose_and_resize(): words = [("abcd", 4), ("ijkl", 6), ("şğuf", 5), ("ibadullah", 10)] for w, expected in words: w_img = aui.get_gray_word_image(w, fontname="Amiri", dpi=600, background=0, foreground=255) component_imgs = aui.decompose_and_resize(w_img, size_x=64, size_y=64) LOG.debug("=== component_imgs.shape ===") LOG.debug(component_imgs.shape) res_n, res_rows, res_cols = component_imgs.shape assert (res_rows, res_cols) == (64, 64) assert res_n == expected, "w: {}, res_n: {}, expected: {}".format( w, res_n, expected) for j in range(res_n): c_img = component_imgs[j] bb = aui.img_bounding_box(c_img) LOG.debug("=== bb ===") LOG.debug(bb) if np.any(bb > 0): assert (bb[0, 0], bb[0, 1]) == (0, 64) assert (bb[1, 0], bb[1, 1]) == (0, 64)
def random_img(): num_words_in_text = nr.randint(num_words_min, num_words_max) text = " ".join( [w.strip() for w in list(nr.choice(words, num_words_in_text))]) fg = nr.randint(fg_min, fg_max) bg = nr.randint(bg_min, bg_max) font = nr.choice(fonts) dpi = nr.randint(dpi_min, dpi_max) return aui.get_gray_word_image(text, font, dpi, bg, fg)
def create_letter_group_image(lg, font, dpi, output_dir): otm = aua.visenc_to_arabic(lg) word_image = aui.get_gray_word_image(otm, fontname=font, dpi=dpi, background=0, foreground=255) components, stats = aui.text_image_segmentation(word_image) LOG.debug("=== lg ===") LOG.debug(lg) LOG.debug("=== components.shape ===") LOG.debug(components.shape) LOG.debug("=== stats.shape ===") LOG.debug(stats.shape) # aui.write_image_array(components, lg) stats_sorted_area = np.argsort(stats['area']) stem_label = aua.delete_visenc_diacritics(lg) li = stats_sorted_area[-1] largest = components[li] largest_dir = os.path.join(output_dir, stem_label) largest_file = os.path.join( largest_dir, "cc-{}-font-{}-dpi-{}-w-{}-h-{}-area-{}.png".format( stem_label, font.replace(" ", "").lower(), dpi, stats[li]["w"], stats[li]["h"], stats[li]["area"])) LOG.debug("=== stem_label ===") LOG.debug(stem_label) if not os.path.isdir(largest_dir): os.makedirs(largest_dir) cv2.imwrite(largest_file, largest) if len(stats) == 2: # if there is a single diacritic di = stats_sorted_area[0] diacritic = components[di] diacritic_label = aua.delete_visenc_stems(lg) n_diacritics = (diacritic_label.count('o') + diacritic_label.count('u') + diacritic_label.count('i')) if n_diacritics == 1: diacritic_label = re.sub(r'^[oui]?(.*)', r'\1', diacritic_label) LOG.debug("=== diacritic_label ===") LOG.debug(diacritic_label) diacritic_dir = os.path.join(output_dir, diacritic_label) diacritic_file = os.path.join( diacritic_dir, "cc-{}-font-{}-dpi-{}-w-{}-h-{}-area-{}.png".format( diacritic_label, font.replace(" ", "").lower(), dpi, stats[di]["w"], stats[di]["h"], stats[di]["area"])) if not os.path.isdir(diacritic_dir): os.makedirs(diacritic_dir) cv2.imwrite(diacritic_file, diacritic)
def create_word_images(word, fonts=['Amiri', 'Droid Sans Arabic'], dpis=[1200]): res_list = [] for font in fonts: for dpi in dpis: word_img = aui.get_gray_word_image(word, fontname=font, dpi=dpi, background=0, foreground=255) res_list.append(word_img) return res_list
def test_generate_component_map(): words = [("abcd", 4), ("ijkl", 6), ("şğuf", 5), ("ibadullah", 10)] for w, expected in words: w_img = aui.get_gray_word_image(w, fontname="Amiri", dpi=600, background=0, foreground=255) LOG.debug("=== w_img.shape ===") LOG.debug(w_img.shape) imgs, stats = aui.text_image_segmentation(w_img) component_map_1 = aui.generate_component_map(stats, PROXIMITY_BOUND=1) component_map_2 = aui.generate_component_map(stats, PROXIMITY_BOUND=2) assert component_map_1.shape[0] < component_map_2.shape[0]
def test_text_image_segmentation(): words = [("abcd", 4), ("ijkl", 6), ("şğuf", 5), ("ibadullah", 10)] for w, expected in words: w_img = aui.get_gray_word_image(w, fontname="Amiri", dpi=600, background=0, foreground=255) LOG.debug("=== w_img.shape ===") LOG.debug(w_img.shape) imgs, stats = aui.text_image_segmentation(w_img) res_n, res_rows, res_cols = imgs.shape assert (res_rows, res_cols) == w_img.shape assert res_n == expected, "w: {}, res_n: {}, expected: {}".format( w, res_n, expected)
def test_generate_component_graph(): import matplotlib.pyplot as plt import networkx as nx words = [("abcd", 4), ("ijkl", 6), ("şğuf", 5), ("ibadullah", 10)] for w, expected in words: w_img = aui.get_gray_word_image(w, fontname="Amiri", dpi=600, background=0, foreground=255) LOG.debug("=== w_img.shape ===") LOG.debug(w_img.shape) imgs, stats = aui.text_image_segmentation(w_img) component_map = aui.generate_component_map(stats, PROXIMITY_BOUND=0.5) graph = aui.component_map_to_nx(stats, component_map) pos = nx.spring_layout(graph) nx.draw_networkx_nodes(graph, pos) nx.draw_networkx_edges(graph, pos) plt.savefig('/tmp/{}-graph.png'.format(w)) plt.clf() assert nx.number_of_nodes(graph) == expected