def test_non_overlapping_blocks_overlap_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "in the in the bleach")
     collation.add_plain_witness("W2", "in the in the bleach in the")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-4, 6-10")),
                   blocks)  # in the in the bleach
Exemple #2
0
 def test_blocks_splitting_token_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a c b c")
     collation.add_plain_witness("W2", "a c b")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 5-7")) # a c b
     self.assertIn(block1, blocks)
Exemple #3
0
 def test_non_overlapping_blocks_black_cat(self):
     collation = Collation()
     collation.add_plain_witness("W1", "the black cat")
     collation.add_plain_witness("W2", "the black cat")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 4-6"))
     self.assertEqual([block1], blocks)
Exemple #4
0
 def test_block_witnesses_Hermans_case_two_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(collation)
     block_witness = algorithm._get_block_witness(collation.witnesses[0])
     self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())
     block_witness = algorithm._get_block_witness(collation.witnesses[1])
     self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())
 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 16-24")),
                   blocks)  # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 25-28")), blocks)  # q r s t
 def test_filter_potential_blocks(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a a")
     collation.add_plain_witness("w2", "a")
     extsufarr = collation.to_extended_suffix_array()
     potential_blocks = extsufarr.split_lcp_array_into_intervals()
     algorithm = Scorer(collation)
     algorithm.filter_potential_blocks(potential_blocks)
     self.assertFalse(potential_blocks)
 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 17-25")),
                   blocks)  # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 26-29")), blocks)  # q r s t
Exemple #8
0
 def test_blocks_failing_transposition_use_case_old_algorithm(self):
     collation = Collation()
     collation.add_plain_witness("W1", "the cat and the dog")
     collation.add_plain_witness("W2", "the dog and the cat")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-1, 9-10"))
     block2 = Block(RangeSet("3-4, 6-7"))
     block3 = Block(RangeSet("2, 8"))
     self.assertEqual([block1, block2, block3], blocks)
 def __init__(self,
              collation,
              near_match=False,
              astar=False,
              debug_scores=False):
     self.collation = collation
     self.debug_scores = debug_scores
     self.scorer = Scorer(collation, near_match)
     print("INFO: Aligning using a* search algorithm. BETA quality.")
     self.align_function = self._align_astar
Exemple #10
0
 def test_block_witnesses_Hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(collation)
     block_witness1 = algorithm._get_block_witness(collation.witnesses[0])
     self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness1.debug())
     block_witness2 = algorithm._get_block_witness(collation.witnesses[1])
     self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness2.debug())
     block_witness3 = algorithm._get_block_witness(collation.witnesses[2])
     self.assertEquals(["a b c d", "g h i", "! q r s t"], block_witness3.debug())
 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks)  # a b c d
     self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks)  # g h i
     self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")),
                   blocks)  # ! q r s t
     self.assertIn(Block(RangeSet("4, 20")), blocks)  # F
 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 17-20, 32-35")), blocks)  # a b c d
     self.assertIn(Block(RangeSet("5-7, 22-24, 37-39")), blocks)  # g h i
     self.assertIn(Block(RangeSet("10-14, 25-29, 40-44")),
                   blocks)  # ! q r s t
     self.assertIn(Block(RangeSet("4, 21")), blocks)  # F
 def __init__(self,
              collation,
              near_match=False,
              debug_scores=False,
              detect_transpositions=False,
              properties_filter=None):
     self.collation = collation
     self.debug_scores = debug_scores
     self.detect_transpositions = detect_transpositions
     self.token_index = TokenIndex(collation.witnesses)
     self.scorer = Scorer(self.token_index,
                          near_match=near_match,
                          properties_filter=properties_filter)
     self.align_function = self._align_table