Example #1
0
class DecisionTreeTest(TestCase):
    def setUp(self):
        # we need to create witnesses
        # 1: a, b, c, d, e
        # 2: a, e, c, d
        # 3: a, d, b

        a = Witness({'id': 'A', 'content': "a b c d e"})
        b = Witness({'id': 'B', 'content': "a e c d"})
        c = Witness({'id': 'C', 'content': "a d b"})

        self.witnesses = [a, b, c]
        self.tokenindex = TokenIndex(self.witnesses)
        self.tokenindex.prepare()

    def test_maximum_score(self):
        # from the token index we need to calculate the maximum amount of matches
        lcp_intervals = self.tokenindex.split_lcp_array_into_intervals()
        possible_matches = calculate_maximum(lcp_intervals)

        # print(possible_matches)
        self.assertEquals(12, possible_matches)

    def test_decision_tree(self):
        tree = DecisionTree(self.witnesses)
        root = tree.root
        self.assertEquals((0, 0, 0), root.coordinates)
        # we need three scores, (current score), (minimum global score, maximum global score)
        self.assertEquals(0, root.current_score)
        self.assertEquals(0, root.minimum_global_score)
        self.assertEquals(12, root.maximum_global_score)
class DecisionTreeTest(TestCase):

    def setUp(self):
        # we need to create witnesses
        # 1: a, b, c, d, e
        # 2: a, e, c, d
        # 3: a, d, b

        a = Witness({'id':'A', 'content':"a b c d e"})
        b = Witness({'id':'B', 'content':"a e c d"})
        c = Witness({'id':'C', 'content':"a d b"})

        self.witnesses = [a, b, c]
        self.tokenindex = TokenIndex(self.witnesses)
        self.tokenindex.prepare()

    def test_maximum_score(self):
        # from the token index we need to calculate the maximum amount of matches
        lcp_intervals = self.tokenindex.split_lcp_array_into_intervals()
        possible_matches = calculate_maximum(lcp_intervals)

        # print(possible_matches)
        self.assertEquals(12, possible_matches)

    def test_decision_tree(self):
        tree = DecisionTree(self.witnesses)
        root = tree.root
        self.assertEquals((0, 0, 0), root.coordinates)
        # we need three scores, (current score), (minimum global score, maximum global score)
        self.assertEquals(0, root.current_score)
        self.assertEquals(0, root.minimum_global_score)
        self.assertEquals(12, root.maximum_global_score)
Example #3
0
 def test_lcp_intervals_number_of_witnesses_Hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     intervals = token_index.split_lcp_array_into_intervals()
     potential_block = intervals[1]  # ! q r s t
     self.assertEqual(3, potential_block.number_of_witnesses)
Example #4
0
class DecisionTree(object):
    def __init__(self, witnesses):
        self.witnesses = witnesses
        self.tokenindex = TokenIndex(witnesses)
        self.tokenindex.prepare()
        self.lcp_intervals = self.tokenindex.split_lcp_array_into_intervals()
        global_maximum_score = calculate_maximum(self.lcp_intervals)
        self.root = DecisionNode(global_maximum_score)

    pass
Example #5
0
 def test_lcp_intervals_number_of_witnesses_Hermans_case(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
     collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
     collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     intervals = token_index.split_lcp_array_into_intervals()
     potential_block = intervals[1] # ! q r s t
     self.assertEqual(3, potential_block.get_depth())
Example #6
0
class DecisionTree(object):

    def __init__(self, witnesses):
        self.witnesses = witnesses
        self.tokenindex = TokenIndex(witnesses)
        self.tokenindex.prepare()
        self.lcp_intervals = self.tokenindex.split_lcp_array_into_intervals()
        global_maximum_score = calculate_maximum(self.lcp_intervals)
        self.root = DecisionNode(global_maximum_score)

    pass
Example #7
0
 def test_filter_potential_blocks(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a a")
     collation.add_plain_witness("w2", "a")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     intervals = token_index.split_lcp_array_into_intervals()
     # expectations
     # There is one interval with length 1, number of occurrences 3, number of witnesses: 2
     a_interval = intervals[0]  # a
     self.assertEqual(2, a_interval.number_of_witnesses)
     self.assertEqual(1, a_interval.length)
     self.assertEqual(3, a_interval.number_of_occurrences)
Example #8
0
 def test_filter_potential_blocks(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a a")
     collation.add_plain_witness("w2", "a")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     blocks = token_index.split_lcp_array_into_intervals()
     # expectations
     # There is one interval with length 1, number of occurrences 3, number of witnesses: 2
     a_block = blocks[0] # a
     self.assertEqual(2, a_block.get_depth())
     self.assertEqual(1, a_block.length)
     self.assertEqual(3, len(a_block.get_all_instances()))
Example #9
0
 def testCaseDanielStoeklLCPIntervals(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d e")
     collation.add_plain_witness("W2", "a e c d")
     collation.add_plain_witness("W3", "a d b")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     blocks = token_index.split_lcp_array_into_intervals()
     self.assertLCP_Interval(2, 1, 3, 3, blocks[0])  # a
     self.assertLCP_Interval(5, 1, 2, 2, blocks[1])  # b
     self.assertLCP_Interval(7, 2, 2, 2, blocks[2])  # c d
     self.assertLCP_Interval(9, 1, 3, 3, blocks[3])  # d
     self.assertLCP_Interval(12, 1, 2, 2, blocks[4])  # e
     self.assertEquals(5, len(blocks))
Example #10
0
 def testCaseDanielStoeklLCPIntervals(self):
     collation = Collation()
     collation.add_plain_witness("W1", "a b c d e")
     collation.add_plain_witness("W2", "a e c d")
     collation.add_plain_witness("W3", "a d b")
     token_index = TokenIndex(collation.witnesses)
     token_index.prepare()
     blocks = token_index.split_lcp_array_into_intervals()
     self.assertLCP_Interval(2, 1, 3, 3, blocks[0])  # a
     self.assertLCP_Interval(5, 1, 2, 2, blocks[1])  # b
     self.assertLCP_Interval(7, 2, 2, 2, blocks[2])  # c d
     self.assertLCP_Interval(9, 1, 3, 3, blocks[3])  # d
     self.assertLCP_Interval(12, 1, 2, 2, blocks[4])  # e
     self.assertEqual(5, len(blocks))