def test_non_overlapping_blocks_overlap_case(self): collation = Collation() collation.add_plain_witness("W1", "in the in the bleach") collation.add_plain_witness("W2", "in the in the bleach in the") algorithm = Scorer(TokenIndex.create_token_index(collation)) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-4, 6-10")), blocks) # in the in the bleach
def test_blocks_splitting_token_case(self): collation = Collation() collation.add_plain_witness("W1", "a c b c") collation.add_plain_witness("W2", "a c b") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() block1 = Block(RangeSet("0-2, 5-7")) # a c b self.assertIn(block1, blocks)
def test_non_overlapping_blocks_black_cat(self): collation = Collation() collation.add_plain_witness("W1", "the black cat") collation.add_plain_witness("W2", "the black cat") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() block1 = Block(RangeSet("0-2, 4-6")) self.assertEqual([block1], blocks)
def test_block_witnesses_Hermans_case_two_witnesses(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") algorithm = Scorer(collation) block_witness = algorithm._get_block_witness(collation.witnesses[0]) self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug()) block_witness = algorithm._get_block_witness(collation.witnesses[1]) self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())
def test_non_overlapping_blocks_Hermans(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") algorithm = Scorer(TokenIndex.create_token_index(collation)) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-8, 16-24")), blocks) # a b c d F g h i ! self.assertIn(Block(RangeSet("11-14, 25-28")), blocks) # q r s t
def test_filter_potential_blocks(self): collation = Collation() collation.add_plain_witness("W1", "a a") collation.add_plain_witness("w2", "a") extsufarr = collation.to_extended_suffix_array() potential_blocks = extsufarr.split_lcp_array_into_intervals() algorithm = Scorer(collation) algorithm.filter_potential_blocks(potential_blocks) self.assertFalse(potential_blocks)
def test_non_overlapping_blocks_Hermans(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-8, 17-25")), blocks) # a b c d F g h i ! self.assertIn(Block(RangeSet("11-14, 26-29")), blocks) # q r s t
def test_blocks_failing_transposition_use_case_old_algorithm(self): collation = Collation() collation.add_plain_witness("W1", "the cat and the dog") collation.add_plain_witness("W2", "the dog and the cat") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() block1 = Block(RangeSet("0-1, 9-10")) block2 = Block(RangeSet("3-4, 6-7")) block3 = Block(RangeSet("2, 8")) self.assertEqual([block1, block2, block3], blocks)
def __init__(self, collation, near_match=False, astar=False, debug_scores=False): self.collation = collation self.debug_scores = debug_scores self.scorer = Scorer(collation, near_match) print("INFO: Aligning using a* search algorithm. BETA quality.") self.align_function = self._align_astar
def test_block_witnesses_Hermans_case(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") collation.add_plain_witness("W3", "a b c d E g h i ! q r s t") algorithm = Scorer(collation) block_witness1 = algorithm._get_block_witness(collation.witnesses[0]) self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness1.debug()) block_witness2 = algorithm._get_block_witness(collation.witnesses[1]) self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness2.debug()) block_witness3 = algorithm._get_block_witness(collation.witnesses[2]) self.assertEquals(["a b c d", "g h i", "! q r s t"], block_witness3.debug())
def test_blocks_Hermans_case_three_witnesses(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") collation.add_plain_witness("W3", "a b c d E g h i ! q r s t") algorithm = Scorer(TokenIndex.create_token_index(collation)) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks) # a b c d self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks) # g h i self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")), blocks) # ! q r s t self.assertIn(Block(RangeSet("4, 20")), blocks) # F
def test_blocks_Hermans_case_three_witnesses(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") collation.add_plain_witness("W3", "a b c d E g h i ! q r s t") algorithm = Scorer(collation) blocks = algorithm._get_non_overlapping_repeating_blocks() self.assertIn(Block(RangeSet("0-3, 17-20, 32-35")), blocks) # a b c d self.assertIn(Block(RangeSet("5-7, 22-24, 37-39")), blocks) # g h i self.assertIn(Block(RangeSet("10-14, 25-29, 40-44")), blocks) # ! q r s t self.assertIn(Block(RangeSet("4, 21")), blocks) # F
def __init__(self, collation, near_match=False, debug_scores=False, detect_transpositions=False, properties_filter=None): self.collation = collation self.debug_scores = debug_scores self.detect_transpositions = detect_transpositions self.token_index = TokenIndex(collation.witnesses) self.scorer = Scorer(self.token_index, near_match=near_match, properties_filter=properties_filter) self.align_function = self._align_table