def test_dot_export_including_annotation(self): witness_a = "<tei><s1>x y z</s1></tei>" witness_b = "<tei><s2>x</s2>y<s3>z</s3></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True) expected_out = """strict digraph TextGraph { 1 [label="x"] 2 [label="y"] 3 [label="z"] 1 -> 2 2 -> 3 { rank=same; 1; 2; 3 } a1 [label="tei"] a2 [label="s1"] a3 [label="s2"] a4 [label="s3"] a2 -> a1 a3 -> a1 1 -> a2 1 -> a3 a2 -> a1 2 -> a2 a2 -> a1 a4 -> a1 3 -> a2 3 -> a4 }""" # TODO: There are some duplication annotation edges here that should be removed! (a2 - a1) # NOTE: For now we work around the problem by adding the "strict" keyword to the DOT export. self.assertEqual(expected_out, dot_export)
def test_3_textgraph_text(self): witness_a = "<tei><p><s>a b<s>c</s>d</s></p></tei>" witness_b = "<tei><div><p><s>a b d</s></p></div></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) text_tokens = textgraph.text_tokens self.assertEquals("[a, b, -c, d]", str(text_tokens))
def test_textgraph_text(self): witness_a = "<tei><s>x y z</s></tei>" witness_b = "<tei><s>x</s>y<s>z</s></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) text_tokens = textgraph.text_tokens self.assertEquals("[x, y, z]", str(text_tokens))
def test_textgraph_milestones_annotations(self): witness_a = "<tei><lb/></tei>" witness_b = "<tei><lb/></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) annotations = textgraph.annotations self.assertEqual(0, len(annotations))
def collate_xml(witness_a, witness_b): tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True) dot = Source(dot_export, format="svg") svg = dot.render() return display(SVG(svg))
def test_dot1_textnodes_only(self): witness_a = "<tei><s>x y z</s></tei>" witness_b = "<tei><s>x</s>y<s>z</s></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) xml = convert_textgraph_to_xml(textgraph) expected = '''<xml><tei><s cx:witness="a"><s cx:witness="b">x</s>y<s cx:witness="b">z</s></s></tei> ''' self.assertEqual(expected, xml)
def test_2_textgraph_annotations(self): witness_a = "<tei><p><s>a b<s>c</s>d</s></p></tei>" witness_b = "<tei><p><s>a b d</s></p></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) annotations = textgraph.annotations self.assertIn(Annotation("s", ["A"], 2, 2, 3), annotations) self.assertIn(Annotation("s", ["A", "B"], 0, 3, 2), annotations) self.assertIn(Annotation("p", ["A", "B"], 0, 3, 1), annotations) self.assertIn(Annotation("tei", ["A", "B"], 0, 3, 0), annotations) self.assertEqual(4, len(annotations))
def collate_xml_svg(limit=1000): # convert XML files into tokens tokens1 = convert_xml_file_into_tokens("xml_source_transcriptions/ts-fol-test-small.xml") tokens2 = convert_xml_file_into_tokens("xml_source_transcriptions/tsq-test-small.xml") superwitness = align_tokens_and_return_superwitness(tokens1, tokens2) print(superwitness[0:20]) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True, limit=limit) # print(dot_export) # render dot_export as SVG dot = Source(dot_export, format="svg") svg = dot.render() return display(SVG(svg))
def test_sort_annotations_based_on_positions_and_level(self): witness_a = "<tei><s>x y z</s></tei>" witness_b = "<tei><s>x</s>y<s>z</s></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) # sort on range start, then on range end, then on level a1 = Annotation("tei", ["A", "B"], 0, 2, 0) a2 = Annotation("s", ["A"], 0, 2, 1) a3 = Annotation("s", ["B"], 0, 0, 1) a4 = Annotation("s", ["B"], 2, 2, 1) expected_annotations = [a1, a2, a3, a4] annotations = textgraph.annotations_sorted self.assertEqual(expected_annotations, annotations)
def collate_xml_example(): # convert XML files into tokens tokens1 = convert_xml_file_into_tokens("../xml_source_transcriptions/ts-fol-test-small.xml") tokens2 = convert_xml_file_into_tokens("../xml_source_transcriptions/tsq-test-small.xml") # log print(tokens1) print(tokens2) # end superwitness = align_tokens_and_return_superwitness(tokens1, tokens2) # log print(superwitness) # end textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph, annotations=True) print(dot_export)
def test_dot1_textnodes_only(self): witness_a = "<tei><s>x y z</s></tei>" witness_b = "<tei><s>x</s>y<s>z</s></tei>" tokens_a = convert_xml_string_into_tokens(witness_a) tokens_b = convert_xml_string_into_tokens(witness_b) superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b) textgraph = convert_superwitness_to_textgraph(superwitness) dot_export = export_as_dot(textgraph) expected_out = """strict digraph TextGraph { 1 [label="x"] 2 [label="y"] 3 [label="z"] 1 -> 2 2 -> 3 { rank=same; 1; 2; 3 } }""" self.assertEqual(expected_out, dot_export)