Python Scorer Exemples, collatex.suffix_based_scorer.Scorer Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : MarcelloPerathoner/collatex

 def test_non_overlapping_blocks_overlap_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "in the in the bleach")
     collation.add_plain_witness("W2", "in the in the bleach in the")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-4, 7-11")), blocks) # in the in the bleach

Exemple #2

0

Afficher le fichier

 def test_non_overlapping_blocks_black_cat(self):
     collation = Collation()
     collation.add_plain_witness("W1", "the black cat")
     collation.add_plain_witness("W2", "the black cat")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 4-6"))
     self.assertEqual([block1], blocks)

Exemple #3

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : mhbeals/collatex

 def test_non_overlapping_blocks_overlap_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "in the in the bleach")
     collation.add_plain_witness("W2", "in the in the bleach in the")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-4, 6-10")),
                   blocks)  # in the in the bleach

Exemple #4

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : ljo/collatex

 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 16-24")), blocks) # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 25-28")), blocks) # q r s t

Exemple #5

0

Afficher le fichier

 def test_blocks_splitting_token_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a c b c")
     collation.add_plain_witness("W2", "a c b")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-2, 5-7")) # a c b
     self.assertIn(block1, blocks)

Exemple #6

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : MarcelloPerathoner/collatex

 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 17-25")), blocks) # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 26-29")), blocks) # q r s t

Exemple #7

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : ljo/collatex

 def test_2(self):
     collation = Collation()
     collation.add_plain_witness("W1", "in the in the bleach")
     collation.add_plain_witness("W2", "in the in the bleach in the")
     collation.add_plain_witness("W3", "in the in the bleach in the")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-4, 6-10, 14-18")), blocks) # in the in the bleach
     self.assertIn(Block(RangeSet("11-12, 19-20")), blocks) # in the

Exemple #8

0

Afficher le fichier

Fichier : test_collatex_block_witnesses.py Projet : enury/collatex

 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 17-25")),
                   blocks)  # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 26-29")), blocks)  # q r s t

Exemple #9

0

Afficher le fichier

Fichier : test_collatex_block_witnesses.py Projet : plutext/collatex

 def test_filter_potential_blocks(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a a")
     collation.add_plain_witness("w2", "a")
     extsufarr = collation.to_extended_suffix_array()
     potential_blocks = extsufarr.split_lcp_array_into_intervals()
     algorithm = Scorer(collation)
     algorithm.filter_potential_blocks(potential_blocks)
     self.assertFalse(potential_blocks)

Exemple #10

0

Afficher le fichier

Fichier : test_collatex_block_witnesses.py Projet : enury/collatex

 def test_filter_potential_blocks(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a a")
     collation.add_plain_witness("w2", "a")
     extsufarr = collation.to_extended_suffix_array()
     potential_blocks = extsufarr.split_lcp_array_into_intervals()
     algorithm = Scorer(collation)
     algorithm.filter_potential_blocks(potential_blocks)
     self.assertFalse(potential_blocks)

Exemple #11

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : mhbeals/collatex

 def test_non_overlapping_blocks_Hermans(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-8, 16-24")),
                   blocks)  # a b c d F g h i !
     self.assertIn(Block(RangeSet("11-14, 25-28")), blocks)  # q r s t

Exemple #12

0

Afficher le fichier

 def test_block_witnesses_Hermans_case_two_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     algorithm = Scorer(collation)
     block_witness = algorithm._get_block_witness(collation.witnesses[0])
     self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())
     block_witness = algorithm._get_block_witness(collation.witnesses[1])
     self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())

Exemple #13

0

Afficher le fichier

 def test_blocks_failing_transposition_use_case_old_algorithm(self):
     collation = Collation()
     collation.add_plain_witness("W1", "the cat and the dog")
     collation.add_plain_witness("W2", "the dog and the cat")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     block1 = Block(RangeSet("0-1, 9-10"))
     block2 = Block(RangeSet("3-4, 6-7"))
     block3 = Block(RangeSet("2, 8"))
     self.assertEqual([block1, block2, block3], blocks)

Exemple #14

0

Afficher le fichier

Fichier : experimental_astar_aligner.py Projet : mhbeals/collatex

 def __init__(self,
              collation,
              near_match=False,
              astar=False,
              debug_scores=False):
     self.collation = collation
     self.debug_scores = debug_scores
     self.scorer = Scorer(collation, near_match)
     print("INFO: Aligning using a* search algorithm. BETA quality.")
     self.align_function = self._align_astar

Exemple #15

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : ljo/collatex

 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks) # a b c d
     self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks) # g h i
     self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")), blocks) # ! q r s t
     self.assertIn(Block(RangeSet("4, 20")), blocks) # F

Exemple #16

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : MarcelloPerathoner/collatex

 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 17-20, 32-35")), blocks) # a b c d
     self.assertIn(Block(RangeSet("5-7, 22-24, 37-39")), blocks) # g h i
     self.assertIn(Block(RangeSet("10-14, 25-29, 40-44")), blocks) # ! q r s t
     self.assertIn(Block(RangeSet("4, 21")), blocks) # F

Exemple #17

0

Afficher le fichier

Fichier : test_collatex_block_witnesses.py Projet : enury/collatex

 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(collation)
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 17-20, 32-35")), blocks)  # a b c d
     self.assertIn(Block(RangeSet("5-7, 22-24, 37-39")), blocks)  # g h i
     self.assertIn(Block(RangeSet("10-14, 25-29, 40-44")),
                   blocks)  # ! q r s t
     self.assertIn(Block(RangeSet("4, 21")), blocks)  # F

Exemple #18

0

Afficher le fichier

 def test_block_witnesses_Hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(collation)
     block_witness1 = algorithm._get_block_witness(collation.witnesses[0])
     self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness1.debug())
     block_witness2 = algorithm._get_block_witness(collation.witnesses[1])
     self.assertEquals(["a b c d", "F", "g h i", "! q r s t"], block_witness2.debug())
     block_witness3 = algorithm._get_block_witness(collation.witnesses[2])
     self.assertEquals(["a b c d", "g h i", "! q r s t"], block_witness3.debug())

Exemple #19

0

Afficher le fichier

Fichier : test_suffix_based_scorer.py Projet : mhbeals/collatex

 def test_blocks_Hermans_case_three_witnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     algorithm = Scorer(TokenIndex.create_token_index(collation))
     blocks = algorithm._get_non_overlapping_repeating_blocks()
     self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks)  # a b c d
     self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks)  # g h i
     self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")),
                   blocks)  # ! q r s t
     self.assertIn(Block(RangeSet("4, 20")), blocks)  # F

Exemple #20

0

Afficher le fichier

Fichier : edit_graph_aligner.py Projet : MarcelloPerathoner/collatex

 def __init__(self,
              collation,
              near_match=False,
              debug_scores=False,
              detect_transpositions=False,
              properties_filter=None):
     self.collation = collation
     self.debug_scores = debug_scores
     self.detect_transpositions = detect_transpositions
     self.token_index = TokenIndex(collation.witnesses)
     self.scorer = Scorer(self.token_index,
                          near_match=near_match,
                          properties_filter=properties_filter)
     self.align_function = self._align_table

Exemple #21

0

Afficher le fichier

Fichier : edit_graph_aligner.py Projet : MarcelloPerathoner/collatex

 def __init__(self, collation, near_match=False, debug_scores=False, detect_transpositions=False, properties_filter=None):
     self.collation = collation
     self.debug_scores = debug_scores
     self.detect_transpositions = detect_transpositions
     self.token_index = TokenIndex(collation.witnesses)
     self.scorer = Scorer(self.token_index, near_match=near_match, properties_filter=properties_filter)
     self.align_function = self._align_table

Exemple #22

0

Afficher le fichier

Fichier : edit_graph_aligner.py Projet : plutext/collatex

 def __init__(self, collation, near_match=False, astar=False, debug_scores=False):
     self.collation = collation
     self.debug_scores = debug_scores
     self.scorer = Scorer(collation, near_match)
     if not astar:
         self.align_function = self._align_table
     else:
         print("INFO: Aligning using a* search algorithm. BETA quality.")
         self.align_function = self._align_astar

Exemple #23

0

Afficher le fichier

Fichier : experimental_astar_aligner.py Projet : mhbeals/collatex

class ExperimentalAstarAligner(CollationAlgorithm):
    def __init__(self,
                 collation,
                 near_match=False,
                 astar=False,
                 debug_scores=False):
        self.collation = collation
        self.debug_scores = debug_scores
        self.scorer = Scorer(collation, near_match)
        print("INFO: Aligning using a* search algorithm. BETA quality.")
        self.align_function = self._align_astar

    def collate(self, graph, collation):
        '''
        :type graph: VariantGraph
        :type collation: Collation
        '''
        # Build the variant graph for the first witness
        # this is easy: generate a vertex for every token
        first_witness = collation.witnesses[0]
        tokens = first_witness.tokens()
        token_to_vertex = self.merge(graph, first_witness.sigil, tokens)

        # let the scorer prepare the first witness
        self.scorer.prepare_witness(first_witness)

        # construct superbase
        superbase = tokens

        # align witness 2 - n
        for x in range(1, len(collation.witnesses)):
            next_witness = collation.witnesses[x]

            # let the scorer prepare the next witness
            self.scorer.prepare_witness(next_witness)

            #             # VOOR CONTROLE!
            #             alignment = self._align_table(superbase, next_witness, token_to_vertex)
            #             self.table2 = self.table

            # alignment = token -> vertex
            alignment = self.align_function(superbase, next_witness,
                                            token_to_vertex)

            # merge
            token_to_vertex.update(
                self.merge(graph, next_witness.sigil, next_witness.tokens(),
                           alignment))

            #             print("actual")
            #             self._debug_edit_graph_table(self.table)
            #             print("expected")
            #             self._debug_edit_graph_table(self.table2)

            # change superbase
            superbase = self.new_superbase

        if self.debug_scores:
            self._debug_edit_graph_table(self.table)

    def _debug_edit_graph_table(self, table):
        # print the table horizontal
        x = PrettyTable()
        x.header = False
        for y in range(0, len(table)):
            cells = table[y]
            x.add_row(cells)
        # alignment can only be set after the field names are known.
        # since add_row sets the field names, it has to be set after x.add_row(cells)
        x.align = "l"
        print(x)
        return x

    # method is here for debug purposes
    # at no time in real life a complete table is needed
    def _create_heuristic_table(self, superbase, witness):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        aligner = AstarEditGraphAligner(self.tokens_witness_a,
                                        self.tokens_witness_b, self.scorer)
        self.table = [[
            AstarEditGraphNode(aligner, y, x)
            for x in range(self.length_witness_a + 1)
        ] for y in range(self.length_witness_b + 1)]
        self.heuristic_table = [[0 for x in range(self.length_witness_a + 1)]
                                for y in range(self.length_witness_b + 1)]
        for y in range(self.length_witness_b + 1):
            for x in range(self.length_witness_a + 1):
                # TODO: I could create node dynamically
                # TODO: creating empty integer is also not really needed
                self.heuristic_table[y][x] = aligner.heuristic(
                    self.table[y][x])

    def _align_astar(self,
                     superbase,
                     witness,
                     token_to_vertex,
                     control_table=None):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.control_table = control_table
        aligner = AstarEditGraphAligner(self.tokens_witness_a,
                                        self.tokens_witness_b, self.scorer)
        self.table = [[
            AstarEditGraphNode(aligner, y, x)
            for x in range(self.length_witness_a + 1)
        ] for y in range(self.length_witness_b + 1)]
        aligner.table = self.table
        start = self.table[0][0]
        path = aligner.search(start, self.control_table)
        self._debug_path = path

        # transform path into an alignment
        alignment = {}

        # segment stuff
        # note we traverse from left to right!
        self.last_x = 0
        self.last_y = 0
        self.new_superbase = []

        for element in path:
            #             print(element.y, element.x)

            if element.match == True:
                # process segments
                self.newer_add_to_superbase(self.tokens_witness_a,
                                            self.tokens_witness_b, element.x,
                                            element.y)
                self.last_x = element.x
                self.last_y = element.y
                # process alignment
                token = self.tokens_witness_a[element.x - 1]
                token2 = self.tokens_witness_b[element.y - 1]
                vertex = token_to_vertex[token]
                alignment[token2] = vertex
                # add match to superbase
                self.new_superbase.append(token)

        # process additions/omissions in the begin of the superbase/witness
        self.newer_add_to_superbase(self.tokens_witness_a,
                                    self.tokens_witness_b,
                                    self.length_witness_a,
                                    self.length_witness_b)
        return alignment

    def newer_add_to_superbase(self, witness_a, witness_b, x, y):
        if x - self.last_x - 1 > 0 or y - self.last_y - 1 > 0:
            # create new segment
            omitted_base = witness_a[self.last_x:x - 1]
            added_witness = witness_b[self.last_y:y - 1]
            self.new_superbase += omitted_base
            self.new_superbase += added_witness

Exemple #24

0

Afficher le fichier

Fichier : experimental_astar_aligner.py Projet : MarcelloPerathoner/collatex

class ExperimentalAstarAligner(CollationAlgorithm):
    def __init__(self, collation, near_match=False, astar=False, debug_scores=False):
        self.collation = collation
        self.debug_scores = debug_scores
        self.scorer = Scorer(collation, near_match)
        print("INFO: Aligning using a* search algorithm. BETA quality.")
        self.align_function = self._align_astar

    def collate(self, graph, collation):
        '''
        :type graph: VariantGraph
        :type collation: Collation
        '''
        # Build the variant graph for the first witness
        # this is easy: generate a vertex for every token
        first_witness = collation.witnesses[0]
        tokens = first_witness.tokens()
        token_to_vertex = self.merge(graph, first_witness.sigil, tokens)

        # let the scorer prepare the first witness
        self.scorer.prepare_witness(first_witness)

        # construct superbase
        superbase = tokens

        # align witness 2 - n
        for x in range(1, len(collation.witnesses)):
            next_witness = collation.witnesses[x]

            # let the scorer prepare the next witness
            self.scorer.prepare_witness(next_witness)

            #             # VOOR CONTROLE!
            #             alignment = self._align_table(superbase, next_witness, token_to_vertex)
            #             self.table2 = self.table

            # alignment = token -> vertex
            alignment = self.align_function(superbase, next_witness, token_to_vertex)

            # merge
            token_to_vertex.update(self.merge(graph, next_witness.sigil, next_witness.tokens(), alignment))

            #             print("actual")
            #             self._debug_edit_graph_table(self.table)
            #             print("expected")
            #             self._debug_edit_graph_table(self.table2)

            # change superbase
            superbase = self.new_superbase

        if self.debug_scores:
            self._debug_edit_graph_table(self.table)

    def _debug_edit_graph_table(self, table):
        # print the table horizontal
        x = PrettyTable()
        x.header = False
        for y in range(0, len(table)):
            cells = table[y]
            x.add_row(cells)
        # alignment can only be set after the field names are known.
        # since add_row sets the field names, it has to be set after x.add_row(cells)
        x.align = "l"
        print(x)
        return x

    # method is here for debug purposes
    # at no time in real life a complete table is needed
    def _create_heuristic_table(self, superbase, witness):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        aligner = AstarEditGraphAligner(self.tokens_witness_a, self.tokens_witness_b, self.scorer)
        self.table = [[AstarEditGraphNode(aligner, y, x) for x in range(self.length_witness_a + 1)] for y in
                      range(self.length_witness_b + 1)]
        self.heuristic_table = [[0 for x in range(self.length_witness_a + 1)] for y in range(self.length_witness_b + 1)]
        for y in range(self.length_witness_b + 1):
            for x in range(self.length_witness_a + 1):
                # TODO: I could create node dynamically
                # TODO: creating empty integer is also not really needed
                self.heuristic_table[y][x] = aligner.heuristic(self.table[y][x])

    def _align_astar(self, superbase, witness, token_to_vertex, control_table=None):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.control_table = control_table
        aligner = AstarEditGraphAligner(self.tokens_witness_a, self.tokens_witness_b, self.scorer)
        self.table = [[AstarEditGraphNode(aligner, y, x) for x in range(self.length_witness_a + 1)] for y in
                      range(self.length_witness_b + 1)]
        aligner.table = self.table
        start = self.table[0][0]
        path = aligner.search(start, self.control_table)
        self._debug_path = path

        # transform path into an alignment
        alignment = {}

        # segment stuff
        # note we traverse from left to right!
        self.last_x = 0
        self.last_y = 0
        self.new_superbase = []

        for element in path:
#             print(element.y, element.x)

            if element.match == True:
                # process segments
                self.newer_add_to_superbase(self.tokens_witness_a, self.tokens_witness_b, element.x, element.y)
                self.last_x = element.x
                self.last_y = element.y
                # process alignment
                token = self.tokens_witness_a[element.x - 1]
                token2 = self.tokens_witness_b[element.y - 1]
                vertex = token_to_vertex[token]
                alignment[token2] = vertex
                # add match to superbase
                self.new_superbase.append(token)

        # process additions/omissions in the begin of the superbase/witness
        self.newer_add_to_superbase(self.tokens_witness_a, self.tokens_witness_b, self.length_witness_a,
                                    self.length_witness_b)
        return alignment

    def newer_add_to_superbase(self, witness_a, witness_b, x, y):
        if x - self.last_x - 1 > 0 or y - self.last_y - 1 > 0:
            # create new segment
            omitted_base = witness_a[self.last_x:x - 1]
            added_witness = witness_b[self.last_y:y - 1]
            self.new_superbase += omitted_base
            self.new_superbase += added_witness

Exemple #25

0

Afficher le fichier

class EditGraphAligner(CollationAlgorithm):
    def __init__(self,
                 collation,
                 near_match=False,
                 astar=False,
                 debug_scores=False):
        self.collation = collation
        self.debug_scores = debug_scores
        self.scorer = Scorer(collation, near_match)
        if not astar:
            self.align_function = self._align_table
        else:
            print("INFO: Aligning using a* search algorithm. BETA quality.")
            self.align_function = self._align_astar

    def collate(self, graph, collation):
        '''
        :type graph: VariantGraph
        :type collation: Collation
        '''
        # Build the variant graph for the first witness
        # this is easy: generate a vertex for every token
        first_witness = collation.witnesses[0]
        tokens = first_witness.tokens()
        token_to_vertex = self.merge(graph, first_witness.sigil, tokens)

        # let the scorer prepare the first witness
        self.scorer.prepare_witness(first_witness)

        # construct superbase
        superbase = tokens

        # align witness 2 - n
        for x in range(1, len(collation.witnesses)):
            next_witness = collation.witnesses[x]

            # let the scorer prepare the next witness
            self.scorer.prepare_witness(next_witness)

            #             # VOOR CONTROLE!
            #             alignment = self._align_table(superbase, next_witness, token_to_vertex)
            #             self.table2 = self.table

            # alignment = token -> vertex
            alignment = self.align_function(superbase, next_witness,
                                            token_to_vertex)

            # merge
            token_to_vertex.update(
                self.merge(graph, next_witness.sigil, next_witness.tokens(),
                           alignment))

            #             print("actual")
            #             self._debug_edit_graph_table(self.table)
            #             print("expected")
            #             self._debug_edit_graph_table(self.table2)

            # change superbase
            superbase = self.new_superbase

        if self.debug_scores:
            self._debug_edit_graph_table(self.table)

    def _align_astar(self,
                     superbase,
                     witness,
                     token_to_vertex,
                     control_table=None):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.control_table = control_table
        aligner = AstarEditGraphAligner(self.tokens_witness_a,
                                        self.tokens_witness_b, self.scorer)
        self.table = [[
            AstarEditGraphNode(aligner, y, x)
            for x in range(self.length_witness_a + 1)
        ] for y in range(self.length_witness_b + 1)]
        aligner.table = self.table
        start = self.table[0][0]
        path = aligner.search(start, self.control_table)

        # transform path into an alignment
        alignment = {}

        # segment stuff
        # note we traverse from left to right!
        self.last_x = 0
        self.last_y = 0
        self.new_superbase = []

        for element in path:
            #             print(element.y, element.x)

            if element.match == True:
                # process segments
                self.newer_add_to_superbase(self.tokens_witness_a,
                                            self.tokens_witness_b, element.x,
                                            element.y)
                self.last_x = element.x
                self.last_y = element.y
                # process alignment
                token = self.tokens_witness_a[element.x - 1]
                token2 = self.tokens_witness_b[element.y - 1]
                vertex = token_to_vertex[token]
                alignment[token2] = vertex
                # add match to superbase
                self.new_superbase.append(token)

        # process additions/omissions in the begin of the superbase/witness
        self.newer_add_to_superbase(self.tokens_witness_a,
                                    self.tokens_witness_b,
                                    self.length_witness_a,
                                    self.length_witness_b)
        return alignment

    def newer_add_to_superbase(self, witness_a, witness_b, x, y):
        if x - self.last_x - 1 > 0 or y - self.last_y - 1 > 0:
            # create new segment
            omitted_base = witness_a[self.last_x:x - 1]
            added_witness = witness_b[self.last_y:y - 1]
            self.new_superbase += omitted_base
            self.new_superbase += added_witness

    def _align_table(self, superbase, witness, token_to_vertex):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.table = [[
            EditGraphNode() for _ in range(self.length_witness_a + 1)
        ] for _ in range(self.length_witness_b + 1)]

        # per diagonal calculate the score (taking into account the three surrounding nodes)
        self.traverse_diagonally()

        alignment = {}

        # segment stuff
        # note we traverse from right to left!
        self.last_x = self.length_witness_a
        self.last_y = self.length_witness_b
        self.new_superbase = []

        # start lower right cell
        x = self.length_witness_a
        y = self.length_witness_b
        # work our way to the upper left
        while x > 0 and y > 0:
            self._process_cell(token_to_vertex, self.tokens_witness_a,
                               self.tokens_witness_b, alignment, x, y)
            # examine neighbor nodes
            nodes_to_examine = set()
            nodes_to_examine.add(self.table[y][x - 1])
            nodes_to_examine.add(self.table[y - 1][x])
            nodes_to_examine.add(self.table[y - 1][x - 1])
            # calculate the maximum scoring parent node
            parent_node = max(nodes_to_examine, key=lambda x: x.g)
            # move position
            if self.table[y - 1][x - 1] == parent_node:
                # another match or replacement
                y = y - 1
                x = x - 1
            else:
                if self.table[y - 1][x] == parent_node:
                    #omission?
                    y = y - 1
                else:
                    if self.table[y][x - 1] == parent_node:
                        #addition?
                        x = x - 1
        # process additions/omissions in the begin of the superbase/witness
        self.add_to_superbase(self.tokens_witness_a, self.tokens_witness_b, 0,
                              0)
        return alignment

    def add_to_superbase(self, witness_a, witness_b, x, y):
        #         print self.last_x - x - 1, self.last_y - y - 1
        if self.last_x - x - 1 > 0 or self.last_y - y - 1 > 0:
            #             print x, self.last_x, y, self.last_y
            # create new segment
            omitted_base = witness_a[x:self.last_x - 1]
            #             print omitted_base
            added_witness = witness_b[y:self.last_y - 1]
            #             print added_witness
            self.new_superbase = added_witness + self.new_superbase
            self.new_superbase = omitted_base + self.new_superbase

    def _process_cell(self, token_to_vertex, witness_a, witness_b, alignment,
                      x, y):
        cell = self.table[y][x]
        # process segments
        if cell.match == True:
            self.add_to_superbase(witness_a, witness_b, x, y)
            self.last_x = x
            self.last_y = y
        # process alignment
        if cell.match == True:
            token = witness_a[x - 1]
            token2 = witness_b[y - 1]
            vertex = token_to_vertex[token]
            alignment[token2] = vertex
            #             print("match")
            #             print(token2)
            self.new_superbase.insert(0, token)
        return cell

    # This function traverses the table diagonally and scores each cell.
    # Original function from Mark Byers; translated from C into Python.
    def traverse_diagonally(self):
        m = self.length_witness_b + 1
        n = self.length_witness_a + 1
        for _slice in range(0, m + n - 1, 1):
            z1 = 0 if _slice < n else _slice - n + 1
            z2 = 0 if _slice < m else _slice - m + 1
            j = _slice - z2
            while j >= z1:
                x = _slice - j
                y = j
                self.score_cell(y, x)
                j -= 1

    def score_cell(self, y, x):
        # initialize root node score to zero (no edit operations have
        # been performed)
        if y == 0 and x == 0:
            self.table[y][x].g = 0
            return
        # examine neighbor nodes
        nodes_to_examine = set()
        # fetch existing score from the left node if possible
        if x > 0:
            nodes_to_examine.add(self.table[y][x - 1])
        if y > 0:
            nodes_to_examine.add(self.table[y - 1][x])
        if x > 0 and y > 0:
            nodes_to_examine.add(self.table[y - 1][x - 1])
        # calculate the maximum scoring parent node
        parent_node = max(nodes_to_examine, key=lambda x: x.g)
        if parent_node == self.table[y - 1][x - 1]:
            edit_operation = 0
        else:
            edit_operation = 1
        token_a = self.tokens_witness_a[x - 1]
        token_b = self.tokens_witness_b[y - 1]
        self.scorer.score_cell(self.table[y][x], parent_node, token_a, token_b,
                               y, x, edit_operation)

    def _debug_edit_graph_table(self, table):
        # print the table horizontal
        x = PrettyTable()
        x.header = False
        for y in range(0, len(table)):
            cells = table[y]
            x.add_row(cells)
        # alignment can only be set after the field names are known.
        # since add_row sets the field names, it has to be set after x.add_row(cells)
        x.align = "l"
        print(x)
        return x

Exemple #26

0

Afficher le fichier

Fichier : edit_graph_aligner.py Projet : plutext/collatex

class EditGraphAligner(CollationAlgorithm):
    def __init__(self, collation, near_match=False, astar=False, debug_scores=False):
        self.collation = collation
        self.debug_scores = debug_scores
        self.scorer = Scorer(collation, near_match)
        if not astar:
            self.align_function = self._align_table
        else:
            print("INFO: Aligning using a* search algorithm. BETA quality.")
            self.align_function = self._align_astar
            
    def collate(self, graph, collation):
        '''
        :type graph: VariantGraph
        :type collation: Collation
        '''
        # Build the variant graph for the first witness
        # this is easy: generate a vertex for every token
        first_witness = collation.witnesses[0]
        tokens = first_witness.tokens()
        token_to_vertex = self.merge(graph, first_witness.sigil, tokens)
        
        # let the scorer prepare the first witness
        self.scorer.prepare_witness(first_witness)
        
        # construct superbase
        superbase = tokens
        
        # align witness 2 - n
        for x in range(1, len(collation.witnesses)):
            next_witness = collation.witnesses[x]
        
            # let the scorer prepare the next witness
            self.scorer.prepare_witness(next_witness)
            
#             # VOOR CONTROLE!
#             alignment = self._align_table(superbase, next_witness, token_to_vertex)
#             self.table2 = self.table
            
            # alignment = token -> vertex
            alignment = self.align_function(superbase, next_witness, token_to_vertex)
        
            # merge
            token_to_vertex.update(self.merge(graph, next_witness.sigil, next_witness.tokens(), alignment))

#             print("actual")
#             self._debug_edit_graph_table(self.table)
#             print("expected")
#             self._debug_edit_graph_table(self.table2)
            
            # change superbase
            superbase = self.new_superbase
        
        if self.debug_scores:
            self._debug_edit_graph_table(self.table)
        
    def _align_astar(self, superbase, witness, token_to_vertex, control_table=None):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.control_table = control_table
        aligner = AstarEditGraphAligner(self.tokens_witness_a, self.tokens_witness_b, self.scorer)
        self.table = [[AstarEditGraphNode(aligner, y, x) for x in range(self.length_witness_a+1)] for y in range(self.length_witness_b+1)]
        aligner.table = self.table
        start = self.table[0][0]
        path = aligner.search(start, self.control_table)
        
        # transform path into an alignment
        alignment = {}

        # segment stuff
        # note we traverse from left to right!
        self.last_x = 0
        self.last_y = 0
        self.new_superbase=[]
        
        for element in path:
#             print(element.y, element.x)
            
            if element.match == True:
                # process segments
                self.newer_add_to_superbase(self.tokens_witness_a, self.tokens_witness_b, element.x, element.y)
                self.last_x = element.x
                self.last_y = element.y
                # process alignment
                token = self.tokens_witness_a[element.x-1]
                token2 = self.tokens_witness_b[element.y-1]
                vertex = token_to_vertex[token]
                alignment[token2] = vertex
                # add match to superbase
                self.new_superbase.append(token)

        # process additions/omissions in the begin of the superbase/witness
        self.newer_add_to_superbase(self.tokens_witness_a, self.tokens_witness_b, self.length_witness_a, self.length_witness_b)
        return alignment
    
    
    def newer_add_to_superbase(self, witness_a, witness_b, x, y):
        if x - self.last_x - 1 > 0 or y - self.last_y - 1 > 0:
            # create new segment
            omitted_base = witness_a[self.last_x:x - 1]
            added_witness = witness_b[self.last_y:y - 1]
            self.new_superbase += omitted_base
            self.new_superbase += added_witness
    
    def _align_table(self, superbase, witness, token_to_vertex):
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.table = [[EditGraphNode() for _ in range(self.length_witness_a+1)] for _ in range(self.length_witness_b+1)]

        # per diagonal calculate the score (taking into account the three surrounding nodes)
        self.traverse_diagonally()

        alignment = {}

        # segment stuff
        # note we traverse from right to left!
        self.last_x = self.length_witness_a
        self.last_y = self.length_witness_b
        self.new_superbase=[]
        
        # start lower right cell
        x = self.length_witness_a
        y = self.length_witness_b
        # work our way to the upper left
        while x > 0 and y > 0:
            self._process_cell(token_to_vertex, self.tokens_witness_a, self.tokens_witness_b, alignment, x, y)
            # examine neighbor nodes
            nodes_to_examine = set()
            nodes_to_examine.add(self.table[y][x-1])
            nodes_to_examine.add(self.table[y-1][x])
            nodes_to_examine.add(self.table[y-1][x-1])
            # calculate the maximum scoring parent node
            parent_node = max(nodes_to_examine, key=lambda x: x.g)
            # move position
            if self.table[y-1][x-1] == parent_node:
                # another match or replacement
                y = y -1
                x = x -1
            else:
                if self.table[y-1][x] == parent_node:
                    #omission?
                    y = y -1
                else:
                    if self.table[y][x-1] == parent_node:
                        #addition?
                        x = x -1
        # process additions/omissions in the begin of the superbase/witness
        self.add_to_superbase(self.tokens_witness_a, self.tokens_witness_b, 0, 0)
        return alignment
        

    def add_to_superbase(self, witness_a, witness_b, x, y):
#         print self.last_x - x - 1, self.last_y - y - 1
        if self.last_x - x - 1 > 0 or self.last_y - y - 1 > 0:
#             print x, self.last_x, y, self.last_y 
            # create new segment
            omitted_base = witness_a[x:self.last_x - 1]
#             print omitted_base
            added_witness = witness_b[y:self.last_y - 1]
#             print added_witness
            self.new_superbase = added_witness + self.new_superbase
            self.new_superbase = omitted_base + self.new_superbase

    def _process_cell(self, token_to_vertex, witness_a, witness_b, alignment, x, y):
        cell = self.table[y][x]
        # process segments
        if cell.match == True:
            self.add_to_superbase(witness_a, witness_b, x, y)
            self.last_x = x
            self.last_y = y
        # process alignment
        if cell.match == True:
            token = witness_a[x-1]
            token2 = witness_b[y-1]
            vertex = token_to_vertex[token]
            alignment[token2] = vertex
#             print("match")
#             print(token2)
            self.new_superbase.insert(0, token)
        return cell

    # This function traverses the table diagonally and scores each cell.
    # Original function from Mark Byers; translated from C into Python.
    def traverse_diagonally(self):
        m = self.length_witness_b+1
        n = self.length_witness_a+1
        for _slice in range(0, m + n - 1, 1):
            z1 = 0 if _slice < n else _slice - n + 1;
            z2 = 0 if _slice < m else _slice - m + 1;
            j = _slice - z2
            while j >= z1:
                x = _slice - j
                y = j
                self.score_cell(y, x)
                j -= 1

    def score_cell(self, y, x):
        # initialize root node score to zero (no edit operations have
        # been performed)
        if y == 0 and x == 0:
            self.table[y][x].g = 0
            return 
        # examine neighbor nodes
        nodes_to_examine = set()
        # fetch existing score from the left node if possible
        if x > 0:
            nodes_to_examine.add(self.table[y][x-1])
        if y > 0:
            nodes_to_examine.add(self.table[y-1][x])
        if x > 0 and y > 0:
            nodes_to_examine.add(self.table[y-1][x-1])
        # calculate the maximum scoring parent node
        parent_node = max(nodes_to_examine, key=lambda x: x.g)
        if parent_node == self.table[y-1][x-1]:
            edit_operation = 0
        else:
            edit_operation = 1
        token_a = self.tokens_witness_a[x-1]
        token_b = self.tokens_witness_b[y-1]
        self.scorer.score_cell(self.table[y][x], parent_node, token_a, token_b, y, x, edit_operation)

    def _debug_edit_graph_table(self, table):
        # print the table horizontal
        x = PrettyTable()
        x.header=False
        for y in range(0, len(table)):
            cells = table[y]
            x.add_row(cells)
        # alignment can only be set after the field names are known.
        # since add_row sets the field names, it has to be set after x.add_row(cells)
        x.align="l"
        print(x)
        return x

Exemple #27

0

Afficher le fichier

Fichier : edit_graph_aligner.py Projet : ljo/collatex

class EditGraphAligner(CollationAlgorithm):
    def __init__(self, collation, near_match=False, debug_scores=False, detect_transpositions=False, properties_filter=None):
        self.collation = collation
        self.debug_scores = debug_scores
        self.detect_transpositions = detect_transpositions
        self.token_index = TokenIndex(collation.witnesses)
        self.scorer = Scorer(self.token_index, near_match=near_match, properties_filter=properties_filter)
        self.align_function = self._align_table
        self.added_witness = []
        self.omitted_base = []

    def collate(self, graph, collation):
        """
        :type graph: VariantGraph
        :type collation: Collation
        """
        # prepare the token index
        self.token_index.prepare()

        # Build the variant graph for the first witness
        # this is easy: generate a vertex for every token
        first_witness = collation.witnesses[0]
        tokens = first_witness.tokens()
        token_to_vertex = self.merge(graph, first_witness.sigil, tokens)

        # let the scorer prepare the first witness
        self.scorer.prepare_witness(first_witness)
        
        # construct superbase
        superbase = tokens
        
        # align witness 2 - n
        for x in range(1, len(collation.witnesses)):
            next_witness = collation.witnesses[x]
        
            # let the scorer prepare the next witness
            self.scorer.prepare_witness(next_witness)
            
#             # VOOR CONTROLE!
#             alignment = self._align_table(superbase, next_witness, token_to_vertex)
#             self.table2 = self.table
            
            # alignment = token -> vertex
            alignment = self.align_function(superbase, next_witness, token_to_vertex)
        
            # merge
            token_to_vertex.update(self.merge(graph, next_witness.sigil, next_witness.tokens(), alignment))

#             print("actual")
#             self._debug_edit_graph_table(self.table)
#             print("expected")
#             self._debug_edit_graph_table(self.table2)
            
            # change superbase
            superbase = self.new_superbase

            if self.detect_transpositions:
                detector = TranspositionDetection(self)
                detector.detect()

        if self.debug_scores:
            self._debug_edit_graph_table(self.table)
        

    def _align_table(self, superbase, witness, token_to_vertex):
        if not superbase:
            raise Exception("Superbase is empty!")

        # print(""+str(superbase)+":"+str(witness.tokens()))
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.table = [[EditGraphNode() for _ in range(self.length_witness_a+1)] for _ in range(self.length_witness_b+1)]

        # per diagonal calculate the score (taking into account the three surrounding nodes)
        self.traverse_diagonally()

        alignment = {}
        self.additions = []
        self.omissions = []
        self.new_superbase=[]
        
        # start lower right cell
        x = self.length_witness_a
        y = self.length_witness_b
        # work our way to the upper left
        while x > 0 and y > 0:
            cell = self.table[y][x]
            self._process_cell(token_to_vertex, self.tokens_witness_a, self.tokens_witness_b, alignment, x, y)
            # examine neighbor nodes
            nodes_to_examine = set()
            nodes_to_examine.add(self.table[y][x-1])
            nodes_to_examine.add(self.table[y-1][x])
            nodes_to_examine.add(self.table[y-1][x-1])
            # calculate the maximum scoring parent node
            parent_node = max(nodes_to_examine, key=lambda x: x.g)
            # move position
            if self.table[y-1][x-1] == parent_node:
                # another match or replacement
                if not cell.match:
                    self.omitted_base.insert(0, self.tokens_witness_a[x-1])
                    self.added_witness.insert(0, self.tokens_witness_b[y-1])
                    # print("replacement:"+str(self.tokens_witness_a[x-1])+":"+str(self.tokens_witness_b[y-1]))
                # else:
                    # print("match:"+str(self.tokens_witness_a[x-1]))
                y -= 1
                x -= 1
            else:
                if self.table[y-1][x] == parent_node:
                    # addition?
                    self.added_witness.insert(0, self.tokens_witness_b[y - 1])
                    # print("added:" + str(self.tokens_witness_b[y - 1]))
                    y -= 1
                else:
                    if self.table[y][x-1] == parent_node:
                        # omission?
                        self.omitted_base.insert(0, self.tokens_witness_a[x - 1])
                        # print("omitted:" + str(self.tokens_witness_a[x - 1]))
                        x -= 1

        # process additions/omissions in the begin of the superbase/witness
        if x > 0:
            self.omitted_base = self.tokens_witness_a[0:x] + self.omitted_base
        if y > 0:
            self.added_witness = self.tokens_witness_b[0:y] + self.added_witness
        self.add_to_superbase()
        return alignment
        
    def add_to_superbase(self):
        if self.omitted_base or self.added_witness:
            # print("update superbase:" + str(self.omitted_base) + ":" + str(self.added_witness))
            # update superbase with additions, omissions
            self.new_superbase = self.added_witness + self.new_superbase
            self.new_superbase = self.omitted_base + self.new_superbase
            self.added_witness = []
            self.omitted_base = []

    def _process_cell(self, token_to_vertex, witness_a, witness_b, alignment, x, y):
        cell = self.table[y][x]
        if cell.match:
            # process segments
            self.add_to_superbase()
            # process alignment
            token = witness_a[x-1]
            token2 = witness_b[y-1]
            vertex = token_to_vertex[token]
            alignment[token2] = vertex
#             print("match")
#             print(token2)
            self.new_superbase.insert(0, token)
        return cell

    # This function traverses the table diagonally and scores each cell.
    # Original function from Mark Byers; translated from C into Python.
    def traverse_diagonally(self):
        m = self.length_witness_b+1
        n = self.length_witness_a+1
        for _slice in range(0, m + n - 1, 1):
            z1 = 0 if _slice < n else _slice - n + 1
            z2 = 0 if _slice < m else _slice - m + 1
            j = _slice - z2
            while j >= z1:
                x = _slice - j
                y = j
                self.score_cell(y, x)
                j -= 1

    def score_cell(self, y, x):
        # initialize root node score to zero (no edit operations have
        # been performed)
        if y == 0 and x == 0:
            self.table[y][x].g = 0
            return 
        # examine neighbor nodes
        nodes_to_examine = set()
        # fetch existing score from the left node if possible
        if x > 0:
            nodes_to_examine.add(self.table[y][x-1])
        if y > 0:
            nodes_to_examine.add(self.table[y-1][x])
        if x > 0 and y > 0:
            nodes_to_examine.add(self.table[y-1][x-1])
        # calculate the maximum scoring parent node
        parent_node = max(nodes_to_examine, key=lambda x: x.g)
        if parent_node == self.table[y-1][x-1]:
            edit_operation = 0
        else:
            edit_operation = 1
        token_a = self.tokens_witness_a[x-1]
        token_b = self.tokens_witness_b[y-1]
        self.scorer.score_cell(self.table[y][x], parent_node, token_a, token_b, y, x, edit_operation)

    def _debug_edit_graph_table(self, table):
        # print the table horizontal
        x = PrettyTable()
        x.header=False
        for y in range(0, len(table)):
            cells = table[y]
            x.add_row(cells)
        # alignment can only be set after the field names are known.
        # since add_row sets the field names, it has to be set after x.add_row(cells)
        x.align="l"
        print(x)
        return x

Exemple #28

0

Afficher le fichier

class EditGraphAligner(CollationAlgorithm):
    def __init__(self,
                 collation,
                 near_match=False,
                 debug_scores=False,
                 detect_transpositions=False,
                 properties_filter=None):
        self.collation = collation
        self.debug_scores = debug_scores
        self.detect_transpositions = detect_transpositions
        self.token_index = TokenIndex(collation.witnesses)
        self.scorer = Scorer(self.token_index,
                             near_match=near_match,
                             properties_filter=properties_filter)
        self.align_function = self._align_table
        self.added_witness = []
        self.omitted_base = []

    def collate(self, graph, collation):
        """
        :type graph: VariantGraph
        :type collation: Collation
        """
        # prepare the token index
        self.token_index.prepare()

        # Build the variant graph for the first witness
        # this is easy: generate a vertex for every token
        first_witness = collation.witnesses[0]
        tokens = first_witness.tokens()
        token_to_vertex = self.merge(graph, first_witness.sigil, tokens)

        # let the scorer prepare the first witness
        self.scorer.prepare_witness(first_witness)

        # construct superbase
        superbase = tokens

        # align witness 2 - n
        for x in range(1, len(collation.witnesses)):
            next_witness = collation.witnesses[x]

            # let the scorer prepare the next witness
            self.scorer.prepare_witness(next_witness)

            #             # VOOR CONTROLE!
            #             alignment = self._align_table(superbase, next_witness, token_to_vertex)
            #             self.table2 = self.table

            # alignment = token -> vertex
            alignment = self.align_function(superbase, next_witness,
                                            token_to_vertex)

            # merge
            token_to_vertex.update(
                self.merge(graph, next_witness.sigil, next_witness.tokens(),
                           alignment))

            #             print("actual")
            #             self._debug_edit_graph_table(self.table)
            #             print("expected")
            #             self._debug_edit_graph_table(self.table2)

            # change superbase
            superbase = self.new_superbase

            if self.detect_transpositions:
                detector = TranspositionDetection(self)
                detector.detect()

        if self.debug_scores:
            self._debug_edit_graph_table(self.table)

    def _align_table(self, superbase, witness, token_to_vertex):
        if not superbase:
            raise Exception("Superbase is empty!")

        # print(""+str(superbase)+":"+str(witness.tokens()))
        self.tokens_witness_a = superbase
        self.tokens_witness_b = witness.tokens()
        self.length_witness_a = len(self.tokens_witness_a)
        self.length_witness_b = len(self.tokens_witness_b)
        self.table = [[
            EditGraphNode() for _ in range(self.length_witness_a + 1)
        ] for _ in range(self.length_witness_b + 1)]

        # per diagonal calculate the score (taking into account the three surrounding nodes)
        self.traverse_diagonally()

        alignment = {}
        self.additions = []
        self.omissions = []
        self.new_superbase = []

        # start lower right cell
        x = self.length_witness_a
        y = self.length_witness_b
        # work our way to the upper left
        while x > 0 and y > 0:
            cell = self.table[y][x]
            self._process_cell(token_to_vertex, self.tokens_witness_a,
                               self.tokens_witness_b, alignment, x, y)
            # examine neighbor nodes
            nodes_to_examine = set()
            nodes_to_examine.add(self.table[y][x - 1])
            nodes_to_examine.add(self.table[y - 1][x])
            nodes_to_examine.add(self.table[y - 1][x - 1])
            # calculate the maximum scoring parent node
            parent_node = max(nodes_to_examine, key=lambda x: x.g)
            # move position
            if self.table[y - 1][x - 1] == parent_node:
                # another match or replacement
                if not cell.match:
                    self.omitted_base.insert(0, self.tokens_witness_a[x - 1])
                    self.added_witness.insert(0, self.tokens_witness_b[y - 1])
                    # print("replacement:"+str(self.tokens_witness_a[x-1])+":"+str(self.tokens_witness_b[y-1]))
                # else:
                # print("match:"+str(self.tokens_witness_a[x-1]))
                y -= 1
                x -= 1
            else:
                if self.table[y - 1][x] == parent_node:
                    # addition?
                    self.added_witness.insert(0, self.tokens_witness_b[y - 1])
                    # print("added:" + str(self.tokens_witness_b[y - 1]))
                    y -= 1
                else:
                    if self.table[y][x - 1] == parent_node:
                        # omission?
                        self.omitted_base.insert(0,
                                                 self.tokens_witness_a[x - 1])
                        # print("omitted:" + str(self.tokens_witness_a[x - 1]))
                        x -= 1

        # process additions/omissions in the begin of the superbase/witness
        if x > 0:
            self.omitted_base = self.tokens_witness_a[0:x] + self.omitted_base
        if y > 0:
            self.added_witness = self.tokens_witness_b[0:y] + self.added_witness
        self.add_to_superbase()
        return alignment

    def add_to_superbase(self):
        if self.omitted_base or self.added_witness:
            # print("update superbase:" + str(self.omitted_base) + ":" + str(self.added_witness))
            # update superbase with additions, omissions
            self.new_superbase = self.added_witness + self.new_superbase
            self.new_superbase = self.omitted_base + self.new_superbase
            self.added_witness = []
            self.omitted_base = []

    def _process_cell(self, token_to_vertex, witness_a, witness_b, alignment,
                      x, y):
        cell = self.table[y][x]
        if cell.match:
            # process segments
            self.add_to_superbase()
            # process alignment
            token = witness_a[x - 1]
            token2 = witness_b[y - 1]
            vertex = token_to_vertex[token]
            alignment[token2] = vertex
            #             print("match")
            #             print(token2)
            self.new_superbase.insert(0, token)
        return cell

    # This function traverses the table diagonally and scores each cell.
    # Original function from Mark Byers; translated from C into Python.
    def traverse_diagonally(self):
        m = self.length_witness_b + 1
        n = self.length_witness_a + 1
        for _slice in range(0, m + n - 1, 1):
            z1 = 0 if _slice < n else _slice - n + 1
            z2 = 0 if _slice < m else _slice - m + 1
            j = _slice - z2
            while j >= z1:
                x = _slice - j
                y = j
                self.score_cell(y, x)
                j -= 1

    def score_cell(self, y, x):
        # initialize root node score to zero (no edit operations have
        # been performed)
        if y == 0 and x == 0:
            self.table[y][x].g = 0
            return
        # examine neighbor nodes
        nodes_to_examine = set()
        # fetch existing score from the left node if possible
        if x > 0:
            nodes_to_examine.add(self.table[y][x - 1])
        if y > 0:
            nodes_to_examine.add(self.table[y - 1][x])
        if x > 0 and y > 0:
            nodes_to_examine.add(self.table[y - 1][x - 1])
        # calculate the maximum scoring parent node
        parent_node = max(nodes_to_examine, key=lambda x: x.g)
        if parent_node == self.table[y - 1][x - 1]:
            edit_operation = 0
        else:
            edit_operation = 1
        token_a = self.tokens_witness_a[x - 1]
        token_b = self.tokens_witness_b[y - 1]
        self.scorer.score_cell(self.table[y][x], parent_node, token_a, token_b,
                               y, x, edit_operation)

    def _debug_edit_graph_table(self, table):
        # print the table horizontal
        x = PrettyTable()
        x.header = False
        for y in range(0, len(table)):
            cells = table[y]
            x.add_row(cells)
        # alignment can only be set after the field names are known.
        # since add_row sets the field names, it has to be set after x.add_row(cells)
        x.align = "l"
        print(x)
        return x