Beispiel #1
0
def parseftml(fnameorstr, feats=None):
    """parse an FTML document into a list ftmlstrings

    Args:
        root : ElementTree Element object representing FTML document
    Returns:
        list of ftmlstring objects, each representing one <test>, in document order

    Within <string> elements it removes <em> markup and converts backslash-u notation to Unicode characters.
    <testgroup> divisions are ignored and tests from all <testgroups> are collected together.
    """

    strs = []
    if os.path.exists(fnameorstr):
        root = et.parse(fnameorstr)
    else:
        root = et.fromstring(fnameorstr)
    for test in root.findall('.//test'):
        s = "".join(test.find('string').itertext())
        s = re.sub(r'\\u([a-fA-F0-9]{4,6})',
                   lambda m: chr(int(m.group(1), 16)), s)
        stylename = test.get('stylename', None)
        if stylename is None:
            lfeats = None
            lang = None
        else:
            style = root.find(f'./head/styles/style[@name="{stylename}"]')
            lfeats = style.get('feats', None)
            if lfeats is not None:
                lfeats = dict(parseFeat(t.strip()) for t in lfeats.split(','))
                if feats is not None:
                    lfeats.update(feats)
            elif feats is not None:
                lfeats = feats
            else:
                lfeats = None
            lang = style.get('lang', None)
        rtl = test.get('rtl', "").lower() in ("true", "1")
        for w in s.split():
            s = UserString(w)
            s.feats = lfeats
            s.lang = lang
            s.rtl = rtl
            strs.append(s)
    return strs