Beispiel #1
0
 def test_find_referent(self):
     assert xp.find(xc3, '//tier[@type="words"]/referent()') == xc3[0][0]
     assert xp.find(xc3, '//tier[@type="words"]/referent("alignment")') == None
     assert xp.find(xc3, '//tier[@type="words"]/referent("segmentation")') == xc3[0][0]
     assert xp.find(xc3, '//item[../@type="words"]/referent()') == xc3[0][0][0]
     assert xp.findall(xc3, '//item[../@type="words"]/referent()') == [xc3[0][0][0], xc3[0][0][0], xc3[0][0][0]]
     assert xp.findall(xc3, '//item[../@type="words"]/referent("alignment")') == []
     assert xp.findall(xc3, '//item[../@type="words"]/referent("segmentation")') == [xc3[0][0][0], xc3[0][0][0], xc3[0][0][0]]
Beispiel #2
0
 def test_find_referrer(self):
     assert xp.find(xc3, '//tier[@type="phrases"]/referrer()') == xc3[0][5]  # because "alignment" comes before "segmentation"
     assert xp.findall(xc3, '//tier[@type="phrases"]/referrer()') == [xc3[0][5], xc3[0][1]]
     assert xp.find(xc3, '//tier[@type="phrases"]/referrer("segmentation")') == xc3[0][1]
     assert xp.find(xc3, '//tier[@type="phrases"]/referrer("alignment")') == xc3[0][5]
     assert xp.find(xc3, '//item[../@type="phrases"]/referrer()') == xc3[0][5][0]
     assert xp.findall(xc3, '//item[../@type="phrases"]/referrer()') == [xc3[0][5][0], xc3[0][1][0], xc3[0][1][1], xc3[0][1][2]]
     assert xp.findall(xc3, '//item[../@type="phrases"]/referrer("alignment")') == [xc3[0][5][0]]
     assert xp.findall(xc3, '//item[../@type="words"]/referrer("segmentation")') == [xc3[0][2][0], xc3[0][2][1], xc3[0][2][2], xc3[0][2][3], xc3[0][2][4], xc3[0][2][5]]
Beispiel #3
0
def tally_pattern(xc, agendum):
    counts = Counter()
    for match in xp.findall(xc, agendum['query']):
        group = CSTuple(xp.findall(match, agendum['subquery']))
        counts[group] += 1
    return [
        (count, agendum['description'].format(match=match))
        for match, count in counts.most_common()
    ]
Beispiel #4
0
 def test_find_metadata(self):
     assert xp.find(xc1m, 'igt/metadata') == xc1m[0].metadata[0]
     assert xp.findall(xc1m, 'igt/metadata') == [xc1m[0].metadata[0]]
     assert xp.find(xc1m, 'igt/metadata/meta') == xc1m[0].metadata[0][0]
     assert xp.findall(xc1m, 'igt/metadata/meta') == [xc1m[0].metadata[0][0]]
     assert xp.find(xc1m, 'igt/metadata/meta/*') == xc1m[0].metadata[0][0][0]
     assert xp.findall(xc1m, 'igt/metadata/meta/*') == [xc1m[0].metadata[0][0][0], xc1m[0].metadata[0][0][1]]
     assert xp.find(xc1m, 'igt/metadata/meta/dc:subject') == xc1m[0].metadata[0][0][0]
     assert xp.find(xc1m, 'igt/metadata//dc:subject') == xc1m[0].metadata[0][0][0]
     assert xp.find(xc1m, 'igt/metadata/meta/dc:subject/@olac:code') == 'jpn'
     assert xp.find(xc1m, 'igt/metadata/meta/dc:subject/text()') == 'Japanese'
     assert xp.findall(xc1m, 'igt/metadata/meta/dc:*/@olac:code') == ['jpn', 'eng']
Beispiel #5
0
 def test_findall(self):
     assert xp.findall(xc1, '/.') == [xc1]
     assert xp.findall(xc1, 'igt') == [xc1[0]]
     assert xp.findall(xc1, 'tier') == []
     assert xp.findall(xc1[0], 'tier') == xc1[0].tiers
     assert xp.findall(xc1, 'igt/tier') == xc1[0].tiers
     assert xp.findall(xc1, '//tier') == xc1[0].tiers
     assert xp.findall(xc1, 'igt/tier/item') == [xc1[0][0][0], xc1[0][1][0]]
     assert xp.findall(xc1, '//item') == [xc1[0][0][0], xc1[0][1][0]]
     assert xp.findall(xc1m, '//meta') == list(xc1m[0].metadata[0])
Beispiel #6
0
 def test_star(self):
     assert xp.findall(xc1, '/*') == [xc1[0]]
     assert xp.findall(xc1, '/*/*') == [xc1[0][0], xc1[0][1]]
     assert xp.findall(xc1, '//tier/*') == [xc1[0][0][0], xc1[0][1][0]]
     # star includes metadata
     assert xp.findall(xc1m, '/igt/*') == list(xc1m[0].metadata) + [xc1m[0][0], xc1m[0][1]]
Beispiel #7
0
 def test_disjunction(self):
     assert xp.find(xc1, '(/igt/tier[@type="phrases"] | /igt/tier[@type="translations"])') == xc1[0][0]
     assert xp.findall(xc1, '(/igt/tier[@type="phrases"] | /igt/tier[@type="translations"])') == [xc1[0][0], xc1[0][1]]
     assert xp.find(xc1, 'igt/(tier[@type="phrases"] | tier[@type="translations"])') == xc1[0][0]
     assert xp.findall(xc1, 'igt/(tier[@type="phrases"] | tier[@type="translations"])') == [xc1[0][0], xc1[0][1]]
     assert xp.findall(xc1, 'igt/(tier[@type="phrases"] | tier[@type="translations"])/item') == [xc1[0][0][0], xc1[0][1][0]]
Beispiel #8
0
if __name__ == '__main__':
    p = ArgumentParser()
    p.add_argument('FILE', nargs='+')
    p.add_argument('-d', '--dest', required=True, help='Output directory for modified files.')
    p.add_argument('-f', '--force', help='Force overwrite existing files.')

    args = p.parse_args()

    for path in args.FILE:
        with open(path, 'r', encoding='utf-8') as f:
            xc = xigtxml.load(f, mode=INCREMENTAL)

            for inst in xc:
                JUDG_LOG.info('Processing instance "{}"'.format(inst.id))
                for item in xigtpath.findall(inst, 'tier[@type='+ODIN_TIER_TYPE+ ']/item'):

                    # Skip blank lines
                    if item.value() is None:
                        continue

                    # Get the judgment and add it if it is non-null.
                    j = get_judgment(item.value())
                    if j is not None:
                        item.attributes[ODIN_JUDGMENT_ATTRIBUTE] = j
                        JUDG_LOG.debug('Judgment found on item "{}"'.format(item.id))

            # Make the output directory if it doesn't exist.
            makedirs(args.dest, exist_ok=True)
            outpath = os.path.join(args.dest, os.path.basename(path))
Beispiel #9
0
def count_pattern(xc, agendum):
    count = len(xp.findall(xc, agendum['query']))
    return (count, agendum['description'].format(match=''))
Beispiel #10
0
def unique_pattern(xc, agendum):
    counts = Counter(xp.findall(xc, agendum['query']))
    return (len(counts), agendum['description'].format(match=''))
Beispiel #11
0
def find_pattern(xc, agendum):
    for match in xp.findall(xc, agendum['query']):
        print(' ', agendum['description'].format(match=match))