def main(opt):

    k1 = Keyword(opt['keyword1'], opt['data_dir'])
    text1 = k1.get_all_context_text()
    text1_repost = k1.get_all_repost_text()

    k2 = Keyword(opt['keyword2'], opt['data_dir'])
    text2 = k2.get_all_context_text()
    text2_repost = k2.get_all_repost_text()

    seg = Segmentation()
    document = seg.segmentation([text1, text2, text1_repost, text2_repost])

    ex = SubKeyExtractor()
    ex.fit(document)
    results = ex.extract(document, 20)

    print(results)

    scorer = SimilarityScore(topn=20)
    score = scorer.score(k1.get_top_context_text(), results[0], results[2],
                         k2.get_top_context_text(), results[1], results[3])

    print(score)

    print('done')
Beispiel #2
0
def pre_processing(image_file, file):

    biggest_component = Remove_Calibration.cutout_img(image_file)
    #Remove_Calibration.save_image(biggest_component, file.split(".")[0])

    bin_image = Binarization.binarize(biggest_component)
    #Binarization.save_image(bin_image, file.split(".")[0])

    # Returns a list of segmented characters and number of rows
    cropped_characters, row, no_ofChar = Segmentation.segmentation(bin_image)
    # Segmentation.save_segmented_characters(cropped_characters, row, file)

    return cropped_characters, row, no_ofChar