def _compare_texts(self, text_a, text_b): """Compare two texts and save the comparisons to the db""" text_ref_a = { 'author' : text_a['author'], 'language' : text_a['language'], 'work' : text_a['title'] } text_ref_b = { 'author' : text_b['author'], 'language' : text_b['language'], 'work' : text_b['title'] } print(" -- comparing", text_a['author'], text_a['title'], "to", text_b['author'], text_b['title']) # Instantiate new TextReuse with metadata about both texts being compared t = TextReuse(text_ref_a, text_ref_b, sanitize_input=True) # Create comparisons from both texts comparisons = t.compare_sliding_window(text_a['text'], text_b['text']) # Save the comparisons to the self.comparisons_dbname database self._save_comparisons(comparisons) return
def test_distance_sliding_window(self): """Test comparing two passages with the sliding window strategy""" t = TextReuse() comparisons = t.compare_sliding_window(demo_verg, demo_prop) self.assertEqual(comparisons[19][3].ratio, 0.64)
def test_distance_sliding_window(self): """Test comparing two passages with the sliding window strategy""" t = TextReuse() comparisons = t.compare_sliding_window(demo_verg, demo_prop) self.assertEqual(comparisons[19][3].ratio, 0.64)
def test_distance_sentences(self): """Test comparing two passages tokenized at the sentence level""" t = TextReuse() comparisons = t.compare_sentences(demo_verg, demo_prop, 'latin') self.assertEqual(comparisons[1][0].ratio, 0.40)
def test_distance_sentences(self): """Test comparing two passages tokenized at the sentence level""" t = TextReuse() comparisons = t.compare_sentences(demo_verg, demo_prop, 'latin') self.assertEqual(comparisons[1][0].ratio, 0.40)