예제 #1
0
def passing_test_image_to_labelmap():
    image_files = aui.recursive_file_list('$HOME/Annex/Arabic/working-set-1')
    for f in image_files:
        labelmap_file, imgs, stats, labels, cmap = vs.image_to_labelmap(f)

        assert labelmap_file.endswith("lm2")
        assert os.path.exists(labelmap_file)
예제 #2
0
def test_chain_code_from_image():
    dataset = "/home/iesahin/Annex/Arabic/arabic-component-dataset-518"
    output_dir = "/tmp/chain-code-results"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    image_files = aui.recursive_file_list(dataset)
    samples = []
    for imgf in image_files:
        img = cv2.imread(imgf, 0)
        cc = afcc.chain_code_from_img(img)
        outfname = "{}/{}.txt".format(output_dir, os.path.basename(imgf))
        np.savetxt(outfname, cc, fmt='%i')
예제 #3
0
def search_string_in_dir(directory,
                         visenc_string,
                         num_results=100):
    labelmap_files = aui.recursive_file_list(directory,
                                             f_regex=r'.*\.lm2$')
    assert len(labelmap_files) > 0

    lm_results = {}
    for lmf in labelmap_files:
        lm = load_labelmap(lmf)
        lm_results[lmf] = search_string_in_component_map(lm,
                                                         visenc_string)
    final_results = _merge_and_sort_lm_results(lm_results)
    return final_results[:num_results]
예제 #4
0
def test_classify_image_with_pretrained():
    model = anmccv5.ComponentModelCTC_v5(weights_file)
    image_files_dir = os.path.expandvars(
        "$HOME/Annex/Arabic/Avians/arabic-component-dataset-4170/")
    image_files = aui.recursive_file_list(image_files_dir)
    image_files = np.random.choice(image_files, 200)
    assert len(image_files) > 0
    for image_file in image_files:
        assert os.path.exists(image_file)
        img = cv2.imread(image_file, 0)
        cls = model.classify_image(img)
        mm = re.match(image_files_dir + r'([a-z]+)/cc.*png', image_file)
        correct_cls = mm.group(1)
        yield check_cls, correct_cls, cls
예제 #5
0
def ngrams_for_dir(the_dir, max_component_distance=1, max_word_distance=1):
    "Builds ngrams for all files in the_dir"

    files = aui.recursive_file_list(the_dir, f_regex="\.txt$")
    component_ngrams = _init_component_ngram_table(size=0)
    word_ngrams = _init_word_ngram_table(size=0)
    for f in files:
        full_f = os.path.join(the_dir, f)
        fcng = generate_component_ngrams(full_f,
                                         max_distance=max_component_distance)
        component_ngrams = _merge_component_arrays(component_ngrams, fcng)
        fwng = generate_word_ngrams(full_f, max_distance=max_word_distance)
        word_ngrams = _merge_word_arrays(word_ngrams, fwng)

    return component_ngrams, word_ngrams
예제 #6
0
def test_train_for_a_single_word():
    model = anmccv5.ComponentModelCTC_v5(None)
    model.train_model(dataset_dir)
    save_file = "/tmp/small-classifier.h5"
    model.save_weights(save_file)
    model_reloaded = anmccv5.ComponentModelCTC_v5(save_file)
    image_files_dir = os.path.expandvars(
        "$HOME/Annex/Arabic/arabic-component-dataset-test-small/")
    image_files = aui.recursive_file_list(image_files_dir)
    assert len(image_files) > 0
    for image_file in image_files:
        assert os.path.exists(image_file)
        img = cv2.imread(image_file, 0)
        # cls = model.classify_image(img)
        cls_reloaded = model_reloaded.classify_image(img)
        mm = re.match(image_files_dir + r'([a-z]+)/cc.*png', image_file)
        correct_cls = mm.group(1)
        yield check_cls, correct_cls, cls_reloaded
예제 #7
0
def test_train_and_classify():
    current_weights_file = artifact_dir + "/weights.h5"
    if os.path.exists(current_weights_file):
        model = anmccv5.ComponentModelCTC_v5(current_weights_file)
    else:
        model = anmccv5.ComponentModelCTC_v5(None)
        model.train_model(dataset_dir=dataset_dir, nb_epoch=100)
        if not os.path.exists(artifact_dir):
            os.mkdir(artifact_dir)
        model.save_weights(current_weights_file)

    image_files_dir = dataset_dir
    image_files = aui.recursive_file_list(image_files_dir)
    image_files = np.random.choice(image_files, 200)
    assert len(image_files) > 0
    for image_file in image_files:
        assert os.path.exists(image_file)
        img = cv2.imread(image_file, 0)
        cls = model.classify_image(img)
        mm = re.match(image_files_dir + r'([a-z]+)/.*png', image_file)
        correct_cls = mm.group(1)
        yield check_cls_among, correct_cls, cls
예제 #8
0
def search_word_in_library(word,
                           image_lib_dir,
                           feature_extractor,
                           fonts=None,
                           feature_size=64,
                           strides=32,
                           max_results=10,
                           output_dir="/tmp"):

    features_table, size_table, distance_table = calculate_word_features(
        word, feature_extractor, feature_size=feature_size)

    largest_component_indices = find_largest_component_indices(size_table)
    largest_component_features = features_table[:,
                                                largest_word_component_indices, :]
    min_relative_size = size_table.min()
    max_relative_distance = distance_table.max()

    # word_patches = _word_patches(word, feature_size, strides=strides)
    image_files = aui.recursive_file_list(image_lib_dir)
    image_file_dict = {fn: i for i, fn in enumerate(image_files)}
    images = [cv2.imread(img_f) for img_f in image_files]
    page_results = []
    for i, img in enumerate(images):
        res_img = search_in_image_via_components(features_table,
                                                 largest_component_indices,
                                                 min_relative_size,
                                                 max_relative_distance,
                                                 img,
                                                 feature_extractor,
                                                 feature_size=feature_size)
        page_results.append((image_files[i], res_img))


#    merged_results = merge_page_results(page_results)
# sorted_results = sorted(page_results, key=lambda p: p[1].min())
# result_imgs = {}
# considered_results = sorted_results[:max_results]
# min_score_to_consider = sorted_results[-1][1].min()
# # max_score_to_consider = sorted_results['score'][considered_results.shape[0]] * 2
# for img_f, r in considered_results:
#     points = np.where(r <= min_score_to_consider)
#     LOG.debug("=== points ===")
#     LOG.debug(points)
#     result_img = images[image_file_dict[img_f]]
#     LOG.debug("=== result_img.shape ===")
#     LOG.debug(result_img.shape)
#     b_channel, g_channel, r_channel = cv2.split(result_img)
#     alpha_channel = np.ones_like(b_channel) * 128
#     LOG.debug("=== alpha_channel ===")
#     LOG.debug(alpha_channel)
#     LOG.debug("=== alpha_channel.shape ===")
#     LOG.debug(alpha_channel.shape)
#     alpha_channel[points] = 255
#     result_img = cv2.merge((b_channel, g_channel, r_channel, alpha_channel))
#     result_imgs[img_f] = result_img

# res_imgf = image_files[r["file"]]
# orig_img = images[image_file_dict[res_imgf]]
# if res_imgf not in result_imgs:
#     # result_imgs[res_imgf] = np.zeros(shape=orig_img.shape,
#     #                                  dtype=np.float) # * max_score_to_consider
#     result_imgs[res_imgf] = orig_img.copy()
# res_img = result_imgs[res_imgf]
# x1 = r["x"]
# x2 = np.int32(r["x"] + r["w"] * 1.10)
# y1 = r["y"]
# y2 = np.int32(r["y"] + r["h"] * 1.10)
# # cv2.imwrite("/tmp/res-{}.png".format(r['score']),
# #             orig_img[x1:x2, y1:y2])
# cv2.rectangle(res_img, (y1, x1), (y2, x2), (0, 0, 255), 2)
# res_img[x1:x2, y1:y2] = orig_img[x1:x2, y1:y2]  # r["score"]

    output_imgs = {f: img for f, img in page_results}

    # output_imgs = {}
    # for rf in result_imgs:
    #     orig_img = images[image_file_dict[rf]]
    #     res_mask = max_score_to_consider - result_imgs[rf]
    #     LOG.debug("=== res_mask ===")
    #     LOG.debug(res_mask)
    #     res_img = np.zeros_like(orig_img)
    #     res_img[res_mask < max_score_to_consider] = orig_img[res_mask < max_score_to_consider]
    #     output_imgs[rf] = res_img

    for rf, out_img in output_imgs.items():
        out_fn = os.path.join(output_dir, os.path.basename(rf) + ".png")
        cv2.imwrite(out_fn, out_img)
    return out_img