def test_scoring_with_properties_filter(self): json_in = { "witnesses" : [ { "id" : "A", "tokens" : [ { "t" : "filler1" }, { "t" : "token" }, ] }, { "id" : "B", "tokens" : [ { "t" : "token", "rend" : "b" }, { "t" : "filler2" }, ] } ] } expected_output = """+---+---------+-------+---------+ | A | filler1 | token | - | | B | - | token | filler2 | +---+---------+-------+---------+""" alignment_table = collate(json_in, segmentation=False) self.assertEqual(expected_output, str(alignment_table)) expected_output = """+---+---------+---------+ | A | filler1 | token | | B | token | filler2 | +---+---------+---------+""" alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False) self.assertEqual(expected_output, str(alignment_table))
def test_scoring_with_properties_filter(self): json_in = { "witnesses": [{ "id": "A", "tokens": [ { "t": "filler1" }, { "t": "token" }, ] }, { "id": "B", "tokens": [ { "t": "token", "rend": "b" }, { "t": "filler2" }, ] }] } expected_output = """\ +---+---------+-------+---------+ | A | filler1 | token | - | | B | - | token | filler2 | +---+---------+-------+---------+""" alignment_table = collate(json_in, segmentation=False) print(str(alignment_table)) self.assertEqual(expected_output, str(alignment_table)) expected_output = """\ +---+---------+---------+ | A | filler1 | token | | B | token | filler2 | +---+---------+---------+""" alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False) print(str(alignment_table)) self.assertEqual(expected_output, str(alignment_table)) del json_in["witnesses"][1]["tokens"][0][ "rend"] # so the 2 tokens have the same user-defined token_data expected_output = """\ +---+---------+-------+---------+ | A | filler1 | token | - | | B | - | token | filler2 | +---+---------+-------+---------+""" alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False) print(str(alignment_table)) self.assertEqual(expected_output, str(alignment_table))
def testColumnStatusInAlignmentTable(self): collation = Collation() collation.add_plain_witness("A", "The quick brown fox jumps over the dog.") collation.add_plain_witness("B", "The brown fox jumps over the lazy dog.") alignment_table = collate(collation) status_array = [] for column in alignment_table.columns: status_array.append(column.variant) self.assertEqual([False, True, False, True, False], status_array) collation.add_plain_witness("C", "The brown fox walks around the lazy dog.") collate(collation) alignment_table = collate(collation) status_array = [] for column in alignment_table.columns: status_array.append(column.variant) self.assertEqual([False, True, False, True, False, True, False], status_array)
def testJSONAlignmentTableRenderingNoSegmentation(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = {"table": [[[{"_sigil": "A", "_token_array_position": 0, "n": "This", "t": "This "}], [{"_sigil": "A", "_token_array_position": 1, "n": "very", "t": "very "}], [{"_sigil": "A", "_token_array_position": 2, "n": "quick", "t": "quick "}], [{"_sigil": "A", "_token_array_position": 3, "n": "very", "t": "very "}], [{"_sigil": "A", "_token_array_position": 4, "n": "quick", "t": "quick "}], [{"_sigil": "A", "_token_array_position": 5, "n": "brown", "t": "brown "}], [{"_sigil": "A", "_token_array_position": 6, "n": "wombat", "t": "wombat"}]], [[{"_sigil": "B", "_token_array_position": 8, "n": "That", "t": "That "}], None, None, [{"_sigil": "B", "_token_array_position": 9, "n": "very", "t": "very "}], [{"_sigil": "B", "_token_array_position": 10, "n": "quick", "t": "quick "}], [{"_sigil": "B", "_token_array_position": 11, "n": "brown", "t": "brown "}], [{"_sigil": "B", "_token_array_position": 12, "n": "koala", "t": "koala"}]], [[{"_sigil": "C", "_token_array_position": 14, "n": "That", "t": "That "}], None, None, [{"_sigil": "C", "_token_array_position": 15, "n": "very", "t": "very "}], [{"_sigil": "C", "_token_array_position": 16, "n": "quick", "t": "quick "}], [{"_sigil": "C", "_token_array_position": 17, "n": "brown", "t": "brown "}], [{"_sigil": "C", "_token_array_position": 18, "n": "kangaroo", "t": "kangaroo"}]]], "witnesses": ["A", "B", "C"]} json_out = collate(collation, output="json", segmentation=False) self.assertEqual(expected_output, json.loads(json_out))
def testPlainTableRenderingVerticalNoSegmentation(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = """\ +--------+-------+----------+ | A | B | C | +--------+-------+----------+ | This | That | That | +--------+-------+----------+ | very | - | - | +--------+-------+----------+ | quick | - | - | +--------+-------+----------+ | very | very | very | +--------+-------+----------+ | quick | quick | quick | +--------+-------+----------+ | brown | brown | brown | +--------+-------+----------+ | wombat | koala | kangaroo | +--------+-------+----------+""" plain_text_output = str(collate(collation, layout="vertical", segmentation=None)) self.assertEqual(expected_output, plain_text_output)
def testPlainTableRenderingVerticalNoSegmentation(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = """\ +--------+-------+----------+ | A | B | C | +--------+-------+----------+ | This | That | That | +--------+-------+----------+ | very | - | - | +--------+-------+----------+ | quick | - | - | +--------+-------+----------+ | very | very | very | +--------+-------+----------+ | quick | quick | quick | +--------+-------+----------+ | brown | brown | brown | +--------+-------+----------+ | wombat | koala | kangaroo | +--------+-------+----------+""" plain_text_output = str( collate(collation, layout="vertical", segmentation=None)) self.assertEqual(expected_output, plain_text_output)
def testJSONAlignmentTableRendering(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = {"table": [[[{"n": "This", "_sigil": "A", "t": "This ", "_token_array_position": 0}, {"n": "very", "_sigil": "A", "t": "very ", "_token_array_position": 1}, {"n": "quick", "_sigil": "A", "t": "quick ", "_token_array_position": 2}], [{"n": "very", "_sigil": "A", "t": "very ", "_token_array_position": 3}, {"n": "quick", "_sigil": "A", "t": "quick ", "_token_array_position": 4}, {"n": "brown", "_sigil": "A", "t": "brown ", "_token_array_position": 5}], [{"n": "wombat", "_sigil": "A", "t": "wombat", "_token_array_position": 6}]], [[{"n": "That", "_sigil": "B", "t": "That ", "_token_array_position": 8}], [{"n": "very", "_sigil": "B", "t": "very ", "_token_array_position": 9}, {"n": "quick", "_sigil": "B", "t": "quick ", "_token_array_position": 10}, {"n": "brown", "_sigil": "B", "t": "brown ", "_token_array_position": 11}], [{"n": "koala", "_sigil": "B", "t": "koala", "_token_array_position": 12}]], [[{"n": "That", "_sigil": "C", "t": "That ", "_token_array_position": 14}], [{"n": "very", "_sigil": "C", "t": "very ", "_token_array_position": 15}, {"n": "quick", "_sigil": "C", "t": "quick ", "_token_array_position": 16}, {"n": "brown", "_sigil": "C", "t": "brown ", "_token_array_position": 17}], [{"n": "kangaroo", "_sigil": "C", "t": "kangaroo", "_token_array_position": 18}]]], "witnesses": ["A", "B", "C"]} json_out = collate(collation, output="json") print(json_out) self.assertEqual(expected_output, json.loads(json_out))
def test_scoring_with_properties_filter(self): json_in = { "witnesses" : [ { "id" : "A", "tokens" : [ { "t" : "filler1" }, { "t" : "token" }, ] }, { "id" : "B", "tokens" : [ { "t" : "token", "rend" : "b" }, { "t" : "filler2" }, ] } ] } expected_output = """\ +---+---------+-------+---------+ | A | filler1 | token | - | | B | - | token | filler2 | +---+---------+-------+---------+""" alignment_table = collate(json_in, segmentation=False) print(str(alignment_table)) self.assertEqual(expected_output, str(alignment_table)) expected_output = """\ +---+---------+---------+ | A | filler1 | token | | B | token | filler2 | +---+---------+---------+""" alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False) print(str(alignment_table)) self.assertEqual(expected_output, str(alignment_table)) del json_in["witnesses"][1]["tokens"][0]["rend"] # so the 2 tokens have the same user-defined token_data expected_output = """\ +---+---------+-------+---------+ | A | filler1 | token | - | | B | - | token | filler2 | +---+---------+-------+---------+""" alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False) print(str(alignment_table)) self.assertEqual(expected_output, str(alignment_table))
def testJSONAlignmentTableRendering(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = '{"table": [[["This very quick"], ["very quick brown"], ["wombat"]], [["That"], ["very quick brown"], ["koala"]], [["That"], ["very quick brown"], ["kangaroo"]]], "witnesses": ["A", "B", "C"]}' json = collate(collation, output="json") self.assertEquals(expected_output, json)
def testJSONAlignmentTableRenderingNoSegmentation(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = {"table": [[[{"n": "This", "t": "This "}], [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "brown", "t": "brown "}], [{"n": "wombat", "t": "wombat"}]], [[{"n": "That", "t": "That "}], None, None, [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "brown", "t": "brown "}], [{"n": "koala", "t": "koala"}]], [[{"n": "That", "t": "That "}], None, None, [{"n": "very", "t": "very "}], [{"n": "quick", "t": "quick "}], [{"n": "brown", "t": "brown "}], [{"n": "kangaroo", "t": "kangaroo"}]]], "witnesses": ["A", "B", "C"]} json_out = collate(collation, output="json", segmentation=False) self.assertEquals(expected_output, json.loads(json_out))
def test_align_with_longest_match(self): collation = Collation() collation.add_plain_witness("A", "a g a g c t a g t") collation.add_plain_witness("B", "a g c t") alignment_table = collate(collation) print("alignment_table=\n", alignment_table) self.assertEqual(['a g ', 'a g c t ', 'a g t'], alignment_table.rows[0].to_list_of_strings()) self.assertEqual([None, 'a g c t', None], alignment_table.rows[1].to_list_of_strings())
def test_non_overlapping_blocks_Hermans(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") alignment_table = collate(collation) print("alignment_table=\n", alignment_table) self.assertEqual(['a b c d F g h i ', '! K ', '! q r s t'], alignment_table.rows[0].to_list_of_strings()) self.assertEqual(['a b c d F g h i ', None, '! q r s t'], alignment_table.rows[1].to_list_of_strings())
def test_scoring_with_properties_filter(self): json_in = { "witnesses": [{ "id": "A", "tokens": [ { "t": "filler1" }, { "t": "token" }, ] }, { "id": "B", "tokens": [ { "t": "token", "rend": "b" }, { "t": "filler2" }, ] }] } expected_output = """+---+---------+-------+---------+ | A | filler1 | token | - | | B | - | token | filler2 | +---+---------+-------+---------+""" alignment_table = collate(json_in, segmentation=False) self.assertEqual(expected_output, str(alignment_table)) expected_output = """+---+---------+---------+ | A | filler1 | token | | B | token | filler2 | +---+---------+---------+""" alignment_table = collate(json_in, properties_filter=self.match_properties, segmentation=False) self.assertEqual(expected_output, str(alignment_table))
def test_1(self): collation = Collation() collation.add_plain_witness("A", "a") collation.add_plain_witness("B", "b") collation.add_plain_witness("C", "a b") alignment_table = collate(collation) print("alignment_table=\n", alignment_table) self.assertEqual(['a', None], alignment_table.rows[0].to_list_of_strings()) self.assertEqual([None, 'b'], alignment_table.rows[1].to_list_of_strings()) self.assertEqual(['a ', 'b'], alignment_table.rows[2].to_list_of_strings())
def test_2(self): collation = Collation() collation.add_plain_witness("W1", "in the in the bleach") collation.add_plain_witness("W2", "in the in the bleach in the") collation.add_plain_witness("W3", "in the in the bleach in the") alignment_table = collate(collation) print("alignment_table=\n", alignment_table) self.assertEqual(['in the in the bleach', None], alignment_table.rows[0].to_list_of_strings()) self.assertEqual(['in the in the bleach ', 'in the'], alignment_table.rows[1].to_list_of_strings()) self.assertEqual(['in the in the bleach ', 'in the'], alignment_table.rows[2].to_list_of_strings())
def testColumnStatusInAlignmentTable(self): collation = Collation() collation.add_plain_witness("A", "The quick brown fox jumps over the dog.") collation.add_plain_witness("B", "The brown fox jumps over the lazy dog.") alignment_table = collate(collation) status_array = [] for column in alignment_table.columns: status_array.append(column.variant) self.assertEqual([False, True, False, True, False], status_array) collation.add_plain_witness( "C", "The brown fox walks around the lazy dog.") collate(collation) alignment_table = collate(collation) status_array = [] for column in alignment_table.columns: status_array.append(column.variant) self.assertEqual([False, True, False, True, False, True, False], status_array)
def test_blocks_Hermans_case_three_witnesses(self): collation = Collation() collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t") collation.add_plain_witness("W2", "a b c d F g h i ! q r s t") collation.add_plain_witness("W3", "a b c d E g h i ! q r s t") alignment_table = collate(collation) print("alignment_table=\n", alignment_table) self.assertEqual(['a b c d ', 'F ', 'g h i ', '! K ', '! q r s t'], alignment_table.rows[0].to_list_of_strings()) self.assertEqual(['a b c d ', 'F ', 'g h i ', None, '! q r s t'], alignment_table.rows[1].to_list_of_strings()) self.assertEqual(['a b c d ', 'E ', 'g h i ', None, '! q r s t'], alignment_table.rows[2].to_list_of_strings())
def testPlainTableRendering(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = """+---+-----------------+------------------+----------+ | A | This very quick | very quick brown | wombat | | B | That | very quick brown | koala | | C | That | very quick brown | kangaroo | +---+-----------------+------------------+----------+""" plain_text_output = str(collate(collation)) self.assertEquals(expected_output, plain_text_output)
def testPlainTableRenderingNoParallelSegmentation(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = """\ +---+------+------+-------+------+-------+-------+----------+ | A | This | very | quick | very | quick | brown | wombat | | B | That | - | - | very | quick | brown | koala | | C | That | - | - | very | quick | brown | kangaroo | +---+------+------+-------+------+-------+-------+----------+""" plain_text_output = str(collate(collation, segmentation=False)) self.assertEqual(expected_output, plain_text_output)
def test_duplicated_tokens_in_witness(self): collation = Collation() collation.add_plain_witness("A", "a") collation.add_plain_witness("B", "b") collation.add_plain_witness("C", "c") collation.add_plain_witness("D", "a a") alignment_table = collate(collation) print("alignment_table=\n", alignment_table) self.assertEqual([None, 'a'], alignment_table.rows[0].to_list_of_strings()) self.assertEqual(['b', None], alignment_table.rows[1].to_list_of_strings()) self.assertEqual(['c', None], alignment_table.rows[2].to_list_of_strings()) self.assertEqual(['a ', 'a'], alignment_table.rows[3].to_list_of_strings())
def test_rank_adjustment(self): collation = Collation() collation.add_plain_witness('A', 'aa bb cc dd ee ff') collation.add_plain_witness('B', 'aa bb ex ff') collation.add_plain_witness('C', 'aa bb cc ee ff') collation.add_plain_witness('D', 'aa bb ex dd ff') collation.add_plain_witness('E', 'aaa aaa aaa aaa aaa') alignment_table = collate(collation) print("alignment_table=\n", alignment_table) self.assertEqual(['aa bb ', 'cc ', 'dd ', 'ee ', 'ff'], alignment_table.rows[0].to_list_of_strings()) self.assertEqual(['aa bb ', 'ex ', None, None, 'ff'], alignment_table.rows[1].to_list_of_strings()) self.assertEqual(['aa bb ', 'cc ', None, 'ee ', 'ff'], alignment_table.rows[2].to_list_of_strings()) self.assertEqual(['aa bb ', 'ex ', 'dd ', None, 'ff'], alignment_table.rows[3].to_list_of_strings()) self.assertEqual(['aaa aaa aaa aaa aaa', None, None, None, None], alignment_table.rows[4].to_list_of_strings())
def test_duplicated_tokens_in_witness2(self): collation = Collation() collation.add_plain_witness("A", "a") collation.add_plain_witness("B", "b") collation.add_plain_witness("C", "c") collation.add_plain_witness("D", "a b c a b c") # alignment_table = collate(collation) # self.assertEqual(['a', None, None, None], alignment_table.rows[0].to_list_of_strings()) # self.assertEqual([None, 'b', None, None], alignment_table.rows[1].to_list_of_strings()) # self.assertEqual([None, None, 'c', None], alignment_table.rows[2].to_list_of_strings()) # self.assertEqual(['a ', 'b ', 'c ', 'a b c'], alignment_table.rows[3].to_list_of_strings()) expected_tei = """<?xml version="1.0" ?> <cx:apparatus xmlns="http://www.tei-c.org/ns/1.0" xmlns:cx="http://interedition.eu/collatex/ns/1.0"> <app> <rdg wit="#D">a b c</rdg> </app> <app> <rdg wit="#A">a</rdg> <rdg wit="#D">a</rdg> </app> <app> <rdg wit="#B">b</rdg> <rdg wit="#D">b</rdg> </app> <app> <rdg wit="#C #D">c</rdg> </app> </cx:apparatus> """ # alignment_table = collate(collation) # print("alignment_table=\n",alignment_table) output_tei = collate(collation, output="tei", indent=True) self.assertEqual(expected_tei, output_tei)
def testPretokenizedWitness(self): pretokenized_witness = { "witnesses": [{ "id": "A", "tokens": [{ "t": "A", "ref": 123 }, { "t": "black", "adj": True }, { "t": "cat", "id": "xyz" }, { "t": "bird", "id": "abc" }] }, { "id": "B", "tokens": [{ "t": "A" }, { "t": "white", "adj": True }, { "t": "mousedog bird", "adj": False }] }] } c = Collation.create_from_dict(pretokenized_witness) result = collate(c, segmentation=False) self.assertEqual(len(result.rows[0].to_list()), 4) self.assertEqual(len(result.rows[1].to_list()), 4) # The second witness should have a token that reads 'mousedog bird'. self.assertIn("mousedog bird", str(result.rows[1].to_list()))
def test_near_matching(self): result = collate(self.json_in, near_match=True, segmentation=False) self.assertEqual(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."], result.rows[0].to_list_of_strings()) self.assertEqual(["I", "bought", None, None, None, None, None, None, "those", None, "glasses", "."], result.rows[1].to_list_of_strings())
def testJSONAlignmentTableRendering(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = { "table": [[[{ "n": "This", "t": "This " }, { "n": "very", "t": "very " }, { "n": "quick", "t": "quick " }], [{ "n": "very", "t": "very " }, { "n": "quick", "t": "quick " }, { "n": "brown", "t": "brown " }], [{ "n": "wombat", "t": "wombat" }]], [[{ "n": "That", "t": "That " }], [{ "n": "very", "t": "very " }, { "n": "quick", "t": "quick " }, { "n": "brown", "t": "brown " }], [{ "n": "koala", "t": "koala" }]], [[{ "n": "That", "t": "That " }], [{ "n": "very", "t": "very " }, { "n": "quick", "t": "quick " }, { "n": "brown", "t": "brown " }], [{ "n": "kangaroo", "t": "kangaroo" }]]], "witnesses": ["A", "B", "C"] } json_out = collate(collation, output="json") self.assertEquals(expected_output, json.loads(json_out))
def test_near_matching_segmented(self): result = collate(self.json_in, near_match=True, segmentation=True) self.assertEquals(["I bought", "this glass, because it matches those dinner plates."], result.rows[0].to_list_of_strings()) self.assertEquals(["I bought", "those glasses."], result.rows[1].to_list_of_strings())
def test_near_matching(self): result = collate(self.json_in, near_match=True, segmentation=False) self.assertEquals(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."], result.rows[0].to_list_of_strings()) self.assertEquals(["I", "bought", None, None, None, None, None, None, "those", None, "glasses", "."], result.rows[1].to_list_of_strings())
def testJSONAlignmentTableRendering(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = { "table": [[[{ "n": "This", "_sigil": "A", "t": "This ", "_token_array_position": 0 }, { "n": "very", "_sigil": "A", "t": "very ", "_token_array_position": 1 }, { "n": "quick", "_sigil": "A", "t": "quick ", "_token_array_position": 2 }], [{ "n": "very", "_sigil": "A", "t": "very ", "_token_array_position": 3 }, { "n": "quick", "_sigil": "A", "t": "quick ", "_token_array_position": 4 }, { "n": "brown", "_sigil": "A", "t": "brown ", "_token_array_position": 5 }], [{ "n": "wombat", "_sigil": "A", "t": "wombat", "_token_array_position": 6 }]], [[{ "n": "That", "_sigil": "B", "t": "That ", "_token_array_position": 8 }], [{ "n": "very", "_sigil": "B", "t": "very ", "_token_array_position": 9 }, { "n": "quick", "_sigil": "B", "t": "quick ", "_token_array_position": 10 }, { "n": "brown", "_sigil": "B", "t": "brown ", "_token_array_position": 11 }], [{ "n": "koala", "_sigil": "B", "t": "koala", "_token_array_position": 12 }]], [[{ "n": "That", "_sigil": "C", "t": "That ", "_token_array_position": 14 }], [{ "n": "very", "_sigil": "C", "t": "very ", "_token_array_position": 15 }, { "n": "quick", "_sigil": "C", "t": "quick ", "_token_array_position": 16 }, { "n": "brown", "_sigil": "C", "t": "brown ", "_token_array_position": 17 }], [{ "n": "kangaroo", "_sigil": "C", "t": "kangaroo", "_token_array_position": 18 }]]], "witnesses": ["A", "B", "C"] } json_out = collate(collation, output="json") print(json_out) self.assertEqual(expected_output, json.loads(json_out))
def test_near_matching_segmented(self): result = collate(self.json_in, near_match=True, segmentation=True) self.assertEqual(["I bought", "this glass, because it matches those dinner plates."], result.rows[0].to_list_of_strings()) self.assertEqual(["I bought", "those glasses."], result.rows[1].to_list_of_strings())
def testJSONAlignmentTableRenderingNoSegmentation(self): collation = Collation() collation.add_plain_witness("A", "This very quick very quick brown wombat") collation.add_plain_witness("B", "That very quick brown koala") collation.add_plain_witness("C", "That very quick brown kangaroo") expected_output = { "table": [[[{ "_sigil": "A", "_token_array_position": 0, "n": "This", "t": "This " }], [{ "_sigil": "A", "_token_array_position": 1, "n": "very", "t": "very " }], [{ "_sigil": "A", "_token_array_position": 2, "n": "quick", "t": "quick " }], [{ "_sigil": "A", "_token_array_position": 3, "n": "very", "t": "very " }], [{ "_sigil": "A", "_token_array_position": 4, "n": "quick", "t": "quick " }], [{ "_sigil": "A", "_token_array_position": 5, "n": "brown", "t": "brown " }], [{ "_sigil": "A", "_token_array_position": 6, "n": "wombat", "t": "wombat" }]], [[{ "_sigil": "B", "_token_array_position": 8, "n": "That", "t": "That " }], None, None, [{ "_sigil": "B", "_token_array_position": 9, "n": "very", "t": "very " }], [{ "_sigil": "B", "_token_array_position": 10, "n": "quick", "t": "quick " }], [{ "_sigil": "B", "_token_array_position": 11, "n": "brown", "t": "brown " }], [{ "_sigil": "B", "_token_array_position": 12, "n": "koala", "t": "koala" }]], [[{ "_sigil": "C", "_token_array_position": 14, "n": "That", "t": "That " }], None, None, [{ "_sigil": "C", "_token_array_position": 15, "n": "very", "t": "very " }], [{ "_sigil": "C", "_token_array_position": 16, "n": "quick", "t": "quick " }], [{ "_sigil": "C", "_token_array_position": 17, "n": "brown", "t": "brown " }], [{ "_sigil": "C", "_token_array_position": 18, "n": "kangaroo", "t": "kangaroo" }]]], "witnesses": ["A", "B", "C"] } json_out = collate(collation, output="json", segmentation=False) self.assertEqual(expected_output, json.loads(json_out))