def __init__(self, witnesses): self.witnesses = witnesses self.tokenindex = TokenIndex(witnesses) self.tokenindex.prepare() self.lcp_intervals = self.tokenindex.split_lcp_array_into_intervals() global_maximum_score = calculate_maximum(self.lcp_intervals) self.root = DecisionNode(global_maximum_score)
def test_witness_ranges_hermans_case(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") token_index = TokenIndex(collation.witnesses) token_index.prepare() self.assertEquals(RangeSet("0-14"), token_index.get_range_for_witness("W1")) self.assertEquals(RangeSet("16-28"), token_index.get_range_for_witness("W2"))
def testTokenArrayMarkersWithThreeWitnesses(self): collation = Collation() collation.add_plain_witness("W1", "interesting nice huh") collation.add_plain_witness("W2", "very nice right") collation.add_plain_witness("W3", "especially interesting") token_index = TokenIndex(collation.witnesses) token_index.prepare() self.assertTokenArray( "interesting nice huh $0 very nice right $1 especially interesting", token_index)
def test_token_array_hermans_case(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") token_index = TokenIndex(collation.witnesses) token_index.prepare() # $ is meant to separate witnesses here self.assertTokenArray( "a b c d F g h i ! K ! q r s t $0 a b c d F g h i ! q r s t", token_index)
def test_lcp_intervals_number_of_witnesses_Hermans_case(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") collation.add_plain_witness("W3", "a b c d E g h i ! q r s t") token_index = TokenIndex(collation.witnesses) token_index.prepare() intervals = token_index.split_lcp_array_into_intervals() potential_block = intervals[1] # ! q r s t self.assertEqual(3, potential_block.number_of_witnesses)
def testCaseDanielStoekl(self): collation = Collation() collation.add_plain_witness("W1", "a b c d e") collation.add_plain_witness("W2", "a e c d") collation.add_plain_witness("W3", "a d b") token_index = TokenIndex(collation.witnesses) token_index.prepare() # Note: the suffix array can have multiple forms # outcome of sorting is not guaranteed # however the LCP array is fixed we can assert that self.assertEquals( array('i', [0, 0, 0, 1, 1, 0, 1, 0, 2, 0, 1, 1, 0, 1]), token_index.get_lcp_array())
def test_filter_potential_blocks(self): collation = Collation() collation.add_plain_witness("W1", "a a") collation.add_plain_witness("w2", "a") token_index = TokenIndex(collation.witnesses) token_index.prepare() intervals = token_index.split_lcp_array_into_intervals() # expectations # There is one interval with length 1, number of occurrences 3, number of witnesses: 2 a_interval = intervals[0] # a self.assertEqual(2, a_interval.number_of_witnesses) self.assertEqual(1, a_interval.length) self.assertEqual(3, a_interval.number_of_occurrences)
def setUp(self): # we need to create witnesses # 1: a, b, c, d, e # 2: a, e, c, d # 3: a, d, b a = Witness({'id': 'A', 'content': "a b c d e"}) b = Witness({'id': 'B', 'content': "a e c d"}) c = Witness({'id': 'C', 'content': "a d b"}) self.witnesses = [a, b, c] self.tokenindex = TokenIndex(self.witnesses) self.tokenindex.prepare()
def testCaseDanielStoeklLCPIntervals(self): collation = Collation() collation.add_plain_witness("W1", "a b c d e") collation.add_plain_witness("W2", "a e c d") collation.add_plain_witness("W3", "a d b") token_index = TokenIndex(collation.witnesses) token_index.prepare() blocks = token_index.split_lcp_array_into_intervals() self.assertLCP_Interval(2, 1, 3, 3, blocks[0]) # a self.assertLCP_Interval(5, 1, 2, 2, blocks[1]) # b self.assertLCP_Interval(7, 2, 2, 2, blocks[2]) # c d self.assertLCP_Interval(9, 1, 3, 3, blocks[3]) # d self.assertLCP_Interval(12, 1, 2, 2, blocks[4]) # e self.assertEquals(5, len(blocks))
def __init__(self, collation, near_match=False, debug_scores=False, detect_transpositions=False, properties_filter=None): self.collation = collation self.debug_scores = debug_scores self.detect_transpositions = detect_transpositions self.token_index = TokenIndex(collation.witnesses) self.scorer = Scorer(self.token_index, near_match=near_match, properties_filter=properties_filter) self.align_function = self._align_table
def __init__(self, collation, near_match=False, debug_scores=False, detect_transpositions=False, properties_filter=None): self.scorer = Scorer() self.collation = collation self.debug_scores = debug_scores self.detect_transpositions = detect_transpositions self.properties_filter = properties_filter self.token_index = TokenIndex(collation.witnesses) self.token_position_to_vertex = {} self.added_witness = [] self.omitted_base = [] self.vertex_array = [] self.cells = [[]]