コード例 #1
0
 def __init__(self, witnesses):
     self.witnesses = witnesses
     self.tokenindex = TokenIndex(witnesses)
     self.tokenindex.prepare()
     self.lcp_intervals = self.tokenindex.split_lcp_array_into_intervals()
     global_maximum_score = calculate_maximum(self.lcp_intervals)
     self.root = DecisionNode(global_maximum_score)
コード例 #2
0
 def test_witness_ranges_hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     self.assertEquals(RangeSet("0-14"),
                       token_index.get_range_for_witness("W1"))
     self.assertEquals(RangeSet("16-28"),
                       token_index.get_range_for_witness("W2"))
コード例 #3
0
 def testTokenArrayMarkersWithThreeWitnesses(self):
     collation = Collation()
     collation.add_plain_witness("W1", "interesting nice huh")
     collation.add_plain_witness("W2", "very nice right")
     collation.add_plain_witness("W3", "especially interesting")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     self.assertTokenArray(
         "interesting nice huh $0 very nice right $1 especially interesting",
         token_index)
コード例 #4
0
 def test_token_array_hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     # $ is meant to separate witnesses here
     self.assertTokenArray(
         "a b c d F g h i ! K ! q r s t $0 a b c d F g h i ! q r s t",
         token_index)
コード例 #5
0
 def test_lcp_intervals_number_of_witnesses_Hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     intervals = token_index.split_lcp_array_into_intervals()
     potential_block = intervals[1]  # ! q r s t
     self.assertEqual(3, potential_block.number_of_witnesses)
コード例 #6
0
 def testCaseDanielStoekl(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d e")
     collation.add_plain_witness("W2", "a e c d")
     collation.add_plain_witness("W3", "a d b")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     # Note: the suffix array can have multiple forms
     # outcome of sorting is not guaranteed
     # however the LCP array is fixed we can assert that
     self.assertEquals(
         array('i', [0, 0, 0, 1, 1, 0, 1, 0, 2, 0, 1, 1, 0, 1]),
         token_index.get_lcp_array())
コード例 #7
0
 def test_filter_potential_blocks(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a a")
     collation.add_plain_witness("w2", "a")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     intervals = token_index.split_lcp_array_into_intervals()
     # expectations
     # There is one interval with length 1, number of occurrences 3, number of witnesses: 2
     a_interval = intervals[0]  # a
     self.assertEqual(2, a_interval.number_of_witnesses)
     self.assertEqual(1, a_interval.length)
     self.assertEqual(3, a_interval.number_of_occurrences)
コード例 #8
0
    def setUp(self):
        # we need to create witnesses
        # 1: a, b, c, d, e
        # 2: a, e, c, d
        # 3: a, d, b

        a = Witness({'id': 'A', 'content': "a b c d e"})
        b = Witness({'id': 'B', 'content': "a e c d"})
        c = Witness({'id': 'C', 'content': "a d b"})

        self.witnesses = [a, b, c]
        self.tokenindex = TokenIndex(self.witnesses)
        self.tokenindex.prepare()
コード例 #9
0
 def testCaseDanielStoeklLCPIntervals(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d e")
     collation.add_plain_witness("W2", "a e c d")
     collation.add_plain_witness("W3", "a d b")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     blocks = token_index.split_lcp_array_into_intervals()
     self.assertLCP_Interval(2, 1, 3, 3, blocks[0])  # a
     self.assertLCP_Interval(5, 1, 2, 2, blocks[1])  # b
     self.assertLCP_Interval(7, 2, 2, 2, blocks[2])  # c d
     self.assertLCP_Interval(9, 1, 3, 3, blocks[3])  # d
     self.assertLCP_Interval(12, 1, 2, 2, blocks[4])  # e
     self.assertEquals(5, len(blocks))
コード例 #10
0
 def __init__(self,
              collation,
              near_match=False,
              debug_scores=False,
              detect_transpositions=False,
              properties_filter=None):
     self.collation = collation
     self.debug_scores = debug_scores
     self.detect_transpositions = detect_transpositions
     self.token_index = TokenIndex(collation.witnesses)
     self.scorer = Scorer(self.token_index,
                          near_match=near_match,
                          properties_filter=properties_filter)
     self.align_function = self._align_table
コード例 #11
0
 def __init__(self,
              collation,
              near_match=False,
              debug_scores=False,
              detect_transpositions=False,
              properties_filter=None):
     self.scorer = Scorer()
     self.collation = collation
     self.debug_scores = debug_scores
     self.detect_transpositions = detect_transpositions
     self.properties_filter = properties_filter
     self.token_index = TokenIndex(collation.witnesses)
     self.token_position_to_vertex = {}
     self.added_witness = []
     self.omitted_base = []
     self.vertex_array = []
     self.cells = [[]]