def test_dot_export_including_annotation(self):
        witness_a = "<tei><s1>x y z</s1></tei>"
        witness_b = "<tei><s2>x</s2>y<s3>z</s3></tei>"
        tokens_a = convert_xml_string_into_tokens(witness_a)
        tokens_b = convert_xml_string_into_tokens(witness_b)
        superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
        textgraph = convert_superwitness_to_textgraph(superwitness)
        dot_export = export_as_dot(textgraph, annotations=True)
        expected_out = """strict digraph TextGraph {
    1 [label="x"]
    2 [label="y"]
    3 [label="z"]
    1 -> 2
    2 -> 3
{ rank=same; 1; 2; 3 }
    a1 [label="tei"]
    a2 [label="s1"]
    a3 [label="s2"]
    a4 [label="s3"]
    a2 -> a1
    a3 -> a1
    1 -> a2
    1 -> a3
    a2 -> a1
    2 -> a2
    a2 -> a1
    a4 -> a1
    3 -> a2
    3 -> a4
}"""
        # TODO: There are some duplication annotation edges here that should be removed! (a2 - a1)
        # NOTE: For now we work around the problem by adding the "strict" keyword to the DOT export.
        self.assertEqual(expected_out, dot_export)
Esempio n. 2
0
 def test_3_textgraph_text(self):
     witness_a = "<tei><p><s>a b<s>c</s>d</s></p></tei>"
     witness_b = "<tei><div><p><s>a b d</s></p></div></tei>"
     tokens_a = convert_xml_string_into_tokens(witness_a)
     tokens_b = convert_xml_string_into_tokens(witness_b)
     superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
     textgraph = convert_superwitness_to_textgraph(superwitness)
     text_tokens = textgraph.text_tokens
     self.assertEquals("[a, b, -c, d]", str(text_tokens))
Esempio n. 3
0
 def test_textgraph_text(self):
     witness_a = "<tei><s>x y z</s></tei>"
     witness_b = "<tei><s>x</s>y<s>z</s></tei>"
     tokens_a = convert_xml_string_into_tokens(witness_a)
     tokens_b = convert_xml_string_into_tokens(witness_b)
     superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
     textgraph = convert_superwitness_to_textgraph(superwitness)
     text_tokens = textgraph.text_tokens
     self.assertEquals("[x, y, z]", str(text_tokens))
Esempio n. 4
0
 def test_textgraph_milestones_annotations(self):
     witness_a = "<tei><lb/></tei>"
     witness_b = "<tei><lb/></tei>"
     tokens_a = convert_xml_string_into_tokens(witness_a)
     tokens_b = convert_xml_string_into_tokens(witness_b)
     superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
     textgraph = convert_superwitness_to_textgraph(superwitness)
     annotations = textgraph.annotations
     self.assertEqual(0, len(annotations))
Esempio n. 5
0
def collate_xml(witness_a, witness_b):
    tokens_a = convert_xml_string_into_tokens(witness_a)
    tokens_b = convert_xml_string_into_tokens(witness_b)
    superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
    textgraph = convert_superwitness_to_textgraph(superwitness)
    dot_export = export_as_dot(textgraph, annotations=True)
    dot = Source(dot_export, format="svg")
    svg = dot.render()
    return display(SVG(svg))
 def test_dot1_textnodes_only(self):
     witness_a = "<tei><s>x y z</s></tei>"
     witness_b = "<tei><s>x</s>y<s>z</s></tei>"
     tokens_a = convert_xml_string_into_tokens(witness_a)
     tokens_b = convert_xml_string_into_tokens(witness_b)
     superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
     textgraph = convert_superwitness_to_textgraph(superwitness)
     xml = convert_textgraph_to_xml(textgraph)
     expected = '''<xml><tei><s cx:witness="a"><s cx:witness="b">x</s>y<s cx:witness="b">z</s></s></tei>
     '''
     self.assertEqual(expected, xml)
Esempio n. 7
0
 def test_2_textgraph_annotations(self):
     witness_a = "<tei><p><s>a b<s>c</s>d</s></p></tei>"
     witness_b = "<tei><p><s>a b d</s></p></tei>"
     tokens_a = convert_xml_string_into_tokens(witness_a)
     tokens_b = convert_xml_string_into_tokens(witness_b)
     superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
     textgraph = convert_superwitness_to_textgraph(superwitness)
     annotations = textgraph.annotations
     self.assertIn(Annotation("s", ["A"], 2, 2, 3), annotations)
     self.assertIn(Annotation("s", ["A", "B"], 0, 3, 2), annotations)
     self.assertIn(Annotation("p", ["A", "B"], 0, 3, 1), annotations)
     self.assertIn(Annotation("tei", ["A", "B"], 0, 3, 0), annotations)
     self.assertEqual(4, len(annotations))
Esempio n. 8
0
def collate_xml_svg(limit=1000):
    # convert XML files into tokens
    tokens1 = convert_xml_file_into_tokens("xml_source_transcriptions/ts-fol-test-small.xml")
    tokens2 = convert_xml_file_into_tokens("xml_source_transcriptions/tsq-test-small.xml")
    superwitness = align_tokens_and_return_superwitness(tokens1, tokens2)
    print(superwitness[0:20])
    textgraph = convert_superwitness_to_textgraph(superwitness)
    dot_export = export_as_dot(textgraph, annotations=True, limit=limit)
    # print(dot_export)
    # render dot_export as SVG
    dot = Source(dot_export, format="svg")
    svg = dot.render()
    return display(SVG(svg))
Esempio n. 9
0
 def test_sort_annotations_based_on_positions_and_level(self):
     witness_a = "<tei><s>x y z</s></tei>"
     witness_b = "<tei><s>x</s>y<s>z</s></tei>"
     tokens_a = convert_xml_string_into_tokens(witness_a)
     tokens_b = convert_xml_string_into_tokens(witness_b)
     superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
     textgraph = convert_superwitness_to_textgraph(superwitness)
     # sort on range start, then on range end, then on level
     a1 = Annotation("tei", ["A", "B"], 0, 2, 0)
     a2 = Annotation("s", ["A"], 0, 2, 1)
     a3 = Annotation("s", ["B"], 0, 0, 1)
     a4 = Annotation("s", ["B"], 2, 2, 1)
     expected_annotations = [a1, a2, a3, a4]
     annotations = textgraph.annotations_sorted
     self.assertEqual(expected_annotations, annotations)
Esempio n. 10
0
def collate_xml_example():
    # convert XML files into tokens
    tokens1 = convert_xml_file_into_tokens("../xml_source_transcriptions/ts-fol-test-small.xml")
    tokens2 = convert_xml_file_into_tokens("../xml_source_transcriptions/tsq-test-small.xml")
    # log
    print(tokens1)
    print(tokens2)
    # end
    superwitness = align_tokens_and_return_superwitness(tokens1, tokens2)
    # log
    print(superwitness)
    # end
    textgraph = convert_superwitness_to_textgraph(superwitness)
    dot_export = export_as_dot(textgraph, annotations=True)
    print(dot_export)
    def test_dot1_textnodes_only(self):
        witness_a = "<tei><s>x y z</s></tei>"
        witness_b = "<tei><s>x</s>y<s>z</s></tei>"
        tokens_a = convert_xml_string_into_tokens(witness_a)
        tokens_b = convert_xml_string_into_tokens(witness_b)
        superwitness = align_tokens_and_return_superwitness(tokens_a, tokens_b)
        textgraph = convert_superwitness_to_textgraph(superwitness)
        dot_export = export_as_dot(textgraph)
        expected_out = """strict digraph TextGraph {
    1 [label="x"]
    2 [label="y"]
    3 [label="z"]
    1 -> 2
    2 -> 3
{ rank=same; 1; 2; 3 }
}"""
        self.assertEqual(expected_out, dot_export)