def main(opt): k1 = Keyword(opt['keyword1'], opt['data_dir']) text1 = k1.get_all_context_text() text1_repost = k1.get_all_repost_text() k2 = Keyword(opt['keyword2'], opt['data_dir']) text2 = k2.get_all_context_text() text2_repost = k2.get_all_repost_text() seg = Segmentation() document = seg.segmentation([text1, text2, text1_repost, text2_repost]) ex = SubKeyExtractor() ex.fit(document) results = ex.extract(document, 20) print(results) scorer = SimilarityScore(topn=20) score = scorer.score(k1.get_top_context_text(), results[0], results[2], k2.get_top_context_text(), results[1], results[3]) print(score) print('done')
def pre_processing(image_file, file): biggest_component = Remove_Calibration.cutout_img(image_file) #Remove_Calibration.save_image(biggest_component, file.split(".")[0]) bin_image = Binarization.binarize(biggest_component) #Binarization.save_image(bin_image, file.split(".")[0]) # Returns a list of segmented characters and number of rows cropped_characters, row, no_ofChar = Segmentation.segmentation(bin_image) # Segmentation.save_segmented_characters(cropped_characters, row, file) return cropped_characters, row, no_ofChar