Ejemplo n.º 1
0
 def test_blocks(self):
     """Test that the right block element is used in the right context."""
     root = self.testdoc.getroot()
     self.assertEqual(
         1,
         len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('ab')))))
     self.assertEqual(
         0,
         len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('p')))))
     d_json = helpers.load_JSON_file(self.testfiles['m3519'])
     d_root = from_sc(d_json,
                      special_chars=self.glyphs,
                      text_filter=helpers.tpen_filter)
     self.assertEqual(
         0,
         len(
             d_root.findall(
                 './/%s/%s/%s' %
                 (self.ns('text'), self.ns('body'), self.ns('ab')))))
     self.assertEqual(
         2,
         len(
             d_root.findall(
                 './/%s/%s/%s' %
                 (self.ns('text'), self.ns('body'), self.ns('p')))))
Ejemplo n.º 2
0
    def setUp(self):
        self.settings = config()

        self.tei_ns = self.settings['namespaces']['tei']
        self.xml_ns = self.settings['namespaces']['xml']

        self.glyphs = helpers.glyph_struct(self.settings['armenian_glyphs'])

        self.testfiles = self.settings['testfiles']
        msdata = helpers.load_JSON_file(self.testfiles['json'])
        self.testdoc_noglyphs = from_sc(msdata)
        self.testdoc = from_sc(msdata, special_chars=self.glyphs)
        self.doc3519 = from_sc(helpers.load_JSON_file(self.testfiles['m3519']),
                               special_chars=self.glyphs,
                               numeric_parser=helpers.armenian_numbers,
                               text_filter=helpers.tpen_filter)
Ejemplo n.º 3
0
 def test_blocks(self):
     """Test that the right block element is used in the right context."""
     root = self.testdoc.getroot()
     self.assertEqual(1, len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('ab')))))
     self.assertEqual(0, len(root.findall('.//%s//%s' % (self.ns('text'), self.ns('p')))))
     d_json = helpers.load_JSON_file(self.testfiles['m3519'])
     d_root = from_sc(d_json, special_chars=self.glyphs, text_filter=helpers.tpen_filter)
     self.assertEqual(0, len(d_root.findall('.//%s/%s/%s' % (self.ns('text'), self.ns('body'), self.ns('ab')))))
     self.assertEqual(2, len(d_root.findall('.//%s/%s/%s' % (self.ns('text'), self.ns('body'), self.ns('p')))))
Ejemplo n.º 4
0
 def test_postprocess(self):
     d_json = helpers.load_JSON_file(self.testfiles['m3519'])
     d_root = from_sc(d_json,
                      special_chars=self.glyphs,
                      text_filter=helpers.tpen_filter,
                      postprocess=helpers.postprocess)
     visited = False
     for tag in d_root.iter(self.ns('pb')):
         visited = True
         self.assertEquals('interesting', tag.get('ana'))
     self.assertTrue(visited)
Ejemplo n.º 5
0
 def test_parse_error(self):
     """Check that a reasonable error message is returned from a JSON file that
     contains badly-formed XML."""
     md = {'short_error': True}
     with io.StringIO() as buf, redirect_stderr(buf):
         badresult = from_sc(self.brokendata, md)
         errormsg = buf.getvalue()
     self.assertRegex(errormsg, 'Parsing error in the JSON')
     errorlines = errormsg.splitlines()[1:]
     self.assertEqual(len(errorlines), 55)
     self.assertRegex(errorlines[0], 'Affected portion of XML is 493: \<pb')
Ejemplo n.º 6
0
 def test_postprocess(self):
     d_json = helpers.load_JSON_file(self.testfiles['m3519'])
     d_root = from_sc(d_json,
                      special_chars=self.glyphs,
                      text_filter=helpers.tpen_filter,
                      postprocess=helpers.postprocess)
     visited = False
     for tag in d_root.iter(self.ns('pb')):
         visited = True
         self.assertEquals('interesting', tag.get('ana'))
     self.assertTrue(visited)
Ejemplo n.º 7
0
 def test_parse_error(self):
     """Check that a reasonable error message is returned from a JSON file that
     contains badly-formed XML."""
     md = {'short_error': True}
     with io.StringIO() as buf, redirect_stderr(buf):
         badresult = from_sc(self.brokendata, md)
         errormsg = buf.getvalue()
     self.assertRegex(errormsg, 'Parsing error in the JSON')
     errorlines = errormsg.splitlines()[1:]
     self.assertEqual(len(errorlines), 55)
     self.assertRegex(errorlines[0], 'Affected portion of XML is 493: \<pb')
Ejemplo n.º 8
0
 def test_members(self):
     msdata = helpers.load_JSON_file(self.testfiles['json'])
     testdoc = from_sc(msdata,
                       members=helpers.test_members(),
                       special_chars=self.glyphs)
     respstmt = testdoc.xpath('//tei:fileDesc/tei:editionStmt/tei:respStmt',
                              namespaces=self.namespaces)
     self.assertEqual(1, len(respstmt))
     self.assertEqual('u281', respstmt[0].get(self.ns('id')))
     self.assertEqual('Me M. and I', respstmt[0].find(self.ns('name')).text)
     for line in testdoc.iter(self.ns('lb')):
         self.assertEquals('#u281', line.get('resp'))
Ejemplo n.º 9
0
    def setUp(self):
        self.settings = config()

        self.tei_ns = self.settings['namespaces']['tei']
        self.xml_ns = self.settings['namespaces']['xml']

        self.glyphs = helpers.glyph_struct(self.settings['armenian_glyphs'])

        self.testfiles = self.settings['testfiles']
        msdata = helpers.load_JSON_file(self.testfiles['json'])
        self.testdoc_noglyphs = from_sc(msdata)
        self.testdoc = from_sc (
            msdata,
            special_chars = self.glyphs
        )
        self.doc3519 = from_sc(
            helpers.load_JSON_file(self.testfiles['m3519']),
            special_chars=self.glyphs,
            numeric_parser=helpers.armenian_numbers,
            text_filter=helpers.tpen_filter
        )
Ejemplo n.º 10
0
 def test_members(self):
     msdata = helpers.load_JSON_file(self.testfiles['json'])
     testdoc = from_sc(
         msdata,
         members=helpers.test_members(),
         special_chars=self.glyphs
     )
     respstmt = testdoc.xpath('//tei:fileDesc/tei:editionStmt/tei:respStmt', namespaces=self.namespaces)
     self.assertEqual(1, len(respstmt))
     self.assertEqual('u281', respstmt[0].get(self.ns('id')))
     self.assertEqual('Me M. and I', respstmt[0].find(self.ns('name')).text)
     for line in testdoc.iter(self.ns('lb')):
         self.assertEquals('#u281', line.get('resp'))
Ejemplo n.º 11
0
    def setUp(self):
        settings = config()

        self.namespaces = settings['namespaces']
        self.tei_ns = settings['namespaces']['tei']
        self.xml_ns = settings['namespaces']['xml']

        self.glyphs = helpers.glyph_struct(settings['armenian_glyphs'])

        self.testfiles = settings['testfiles']
        msdata = helpers.load_JSON_file(self.testfiles['json'])
        self.testdoc = from_sc(
            msdata,
            special_chars=self.glyphs
        )

        user_defined = {'title': 'Ժամանակագրութիւն', 'author': 'Մատթէոս Ուռհայեցի'}
        legacydata = helpers.load_JSON_file(self.testfiles['legacy'])
        self.legacydoc = from_sc(legacydata, metadata=user_defined,
                                 special_chars=self.glyphs,
                                 numeric_parser=helpers.armenian_numbers,
                                 text_filter=helpers.tpen_filter)
        self.brokendata = helpers.load_JSON_file(self.testfiles['broken'])
Ejemplo n.º 12
0
    def setUp(self):
        settings = config()

        self.namespaces = settings['namespaces']
        self.tei_ns = settings['namespaces']['tei']
        self.xml_ns = settings['namespaces']['xml']

        self.glyphs = helpers.glyph_struct(settings['armenian_glyphs'])

        self.testfiles = settings['testfiles']
        msdata = helpers.load_JSON_file(self.testfiles['json'])
        self.testdoc = from_sc(msdata, special_chars=self.glyphs)

        user_defined = {
            'title': 'Ժամանակագրութիւն',
            'author': 'Մատթէոս Ուռհայեցի'
        }
        legacydata = helpers.load_JSON_file(self.testfiles['legacy'])
        self.legacydoc = from_sc(legacydata,
                                 metadata=user_defined,
                                 special_chars=self.glyphs,
                                 numeric_parser=helpers.armenian_numbers,
                                 text_filter=helpers.tpen_filter)
        self.brokendata = helpers.load_JSON_file(self.testfiles['broken'])
Ejemplo n.º 13
0
import json
import sys

sys.path.append('transcription')
import config
from lxml import etree
from tpen2tei.parse import from_sc
from tpen2tei.wordtokenize import from_etree

with open(sys.argv[1], encoding='utf-8') as jfile:
    msdata = json.load(jfile)
xmltree = from_sc(msdata,
                  metadata=config.metadata,
                  special_chars=config.special_chars,
                  numeric_parser=config.numeric_parser,
                  text_filter=config.transcription_filter)

sys.stdout.buffer.write(
    etree.tostring(xmltree,
                   encoding='utf-8',
                   pretty_print=True,
                   xml_declaration=True))