def test_find_referent(self): assert xp.find(xc3, '//tier[@type="words"]/referent()') == xc3[0][0] assert xp.find(xc3, '//tier[@type="words"]/referent("alignment")') == None assert xp.find(xc3, '//tier[@type="words"]/referent("segmentation")') == xc3[0][0] assert xp.find(xc3, '//item[../@type="words"]/referent()') == xc3[0][0][0] assert xp.findall(xc3, '//item[../@type="words"]/referent()') == [xc3[0][0][0], xc3[0][0][0], xc3[0][0][0]] assert xp.findall(xc3, '//item[../@type="words"]/referent("alignment")') == [] assert xp.findall(xc3, '//item[../@type="words"]/referent("segmentation")') == [xc3[0][0][0], xc3[0][0][0], xc3[0][0][0]]
def test_find_referrer(self): assert xp.find(xc3, '//tier[@type="phrases"]/referrer()') == xc3[0][5] # because "alignment" comes before "segmentation" assert xp.findall(xc3, '//tier[@type="phrases"]/referrer()') == [xc3[0][5], xc3[0][1]] assert xp.find(xc3, '//tier[@type="phrases"]/referrer("segmentation")') == xc3[0][1] assert xp.find(xc3, '//tier[@type="phrases"]/referrer("alignment")') == xc3[0][5] assert xp.find(xc3, '//item[../@type="phrases"]/referrer()') == xc3[0][5][0] assert xp.findall(xc3, '//item[../@type="phrases"]/referrer()') == [xc3[0][5][0], xc3[0][1][0], xc3[0][1][1], xc3[0][1][2]] assert xp.findall(xc3, '//item[../@type="phrases"]/referrer("alignment")') == [xc3[0][5][0]] assert xp.findall(xc3, '//item[../@type="words"]/referrer("segmentation")') == [xc3[0][2][0], xc3[0][2][1], xc3[0][2][2], xc3[0][2][3], xc3[0][2][4], xc3[0][2][5]]
def tally_pattern(xc, agendum): counts = Counter() for match in xp.findall(xc, agendum['query']): group = CSTuple(xp.findall(match, agendum['subquery'])) counts[group] += 1 return [ (count, agendum['description'].format(match=match)) for match, count in counts.most_common() ]
def test_find_metadata(self): assert xp.find(xc1m, 'igt/metadata') == xc1m[0].metadata[0] assert xp.findall(xc1m, 'igt/metadata') == [xc1m[0].metadata[0]] assert xp.find(xc1m, 'igt/metadata/meta') == xc1m[0].metadata[0][0] assert xp.findall(xc1m, 'igt/metadata/meta') == [xc1m[0].metadata[0][0]] assert xp.find(xc1m, 'igt/metadata/meta/*') == xc1m[0].metadata[0][0][0] assert xp.findall(xc1m, 'igt/metadata/meta/*') == [xc1m[0].metadata[0][0][0], xc1m[0].metadata[0][0][1]] assert xp.find(xc1m, 'igt/metadata/meta/dc:subject') == xc1m[0].metadata[0][0][0] assert xp.find(xc1m, 'igt/metadata//dc:subject') == xc1m[0].metadata[0][0][0] assert xp.find(xc1m, 'igt/metadata/meta/dc:subject/@olac:code') == 'jpn' assert xp.find(xc1m, 'igt/metadata/meta/dc:subject/text()') == 'Japanese' assert xp.findall(xc1m, 'igt/metadata/meta/dc:*/@olac:code') == ['jpn', 'eng']
def test_findall(self): assert xp.findall(xc1, '/.') == [xc1] assert xp.findall(xc1, 'igt') == [xc1[0]] assert xp.findall(xc1, 'tier') == [] assert xp.findall(xc1[0], 'tier') == xc1[0].tiers assert xp.findall(xc1, 'igt/tier') == xc1[0].tiers assert xp.findall(xc1, '//tier') == xc1[0].tiers assert xp.findall(xc1, 'igt/tier/item') == [xc1[0][0][0], xc1[0][1][0]] assert xp.findall(xc1, '//item') == [xc1[0][0][0], xc1[0][1][0]] assert xp.findall(xc1m, '//meta') == list(xc1m[0].metadata[0])
def test_star(self): assert xp.findall(xc1, '/*') == [xc1[0]] assert xp.findall(xc1, '/*/*') == [xc1[0][0], xc1[0][1]] assert xp.findall(xc1, '//tier/*') == [xc1[0][0][0], xc1[0][1][0]] # star includes metadata assert xp.findall(xc1m, '/igt/*') == list(xc1m[0].metadata) + [xc1m[0][0], xc1m[0][1]]
def test_disjunction(self): assert xp.find(xc1, '(/igt/tier[@type="phrases"] | /igt/tier[@type="translations"])') == xc1[0][0] assert xp.findall(xc1, '(/igt/tier[@type="phrases"] | /igt/tier[@type="translations"])') == [xc1[0][0], xc1[0][1]] assert xp.find(xc1, 'igt/(tier[@type="phrases"] | tier[@type="translations"])') == xc1[0][0] assert xp.findall(xc1, 'igt/(tier[@type="phrases"] | tier[@type="translations"])') == [xc1[0][0], xc1[0][1]] assert xp.findall(xc1, 'igt/(tier[@type="phrases"] | tier[@type="translations"])/item') == [xc1[0][0][0], xc1[0][1][0]]
if __name__ == '__main__': p = ArgumentParser() p.add_argument('FILE', nargs='+') p.add_argument('-d', '--dest', required=True, help='Output directory for modified files.') p.add_argument('-f', '--force', help='Force overwrite existing files.') args = p.parse_args() for path in args.FILE: with open(path, 'r', encoding='utf-8') as f: xc = xigtxml.load(f, mode=INCREMENTAL) for inst in xc: JUDG_LOG.info('Processing instance "{}"'.format(inst.id)) for item in xigtpath.findall(inst, 'tier[@type='+ODIN_TIER_TYPE+ ']/item'): # Skip blank lines if item.value() is None: continue # Get the judgment and add it if it is non-null. j = get_judgment(item.value()) if j is not None: item.attributes[ODIN_JUDGMENT_ATTRIBUTE] = j JUDG_LOG.debug('Judgment found on item "{}"'.format(item.id)) # Make the output directory if it doesn't exist. makedirs(args.dest, exist_ok=True) outpath = os.path.join(args.dest, os.path.basename(path))
def count_pattern(xc, agendum): count = len(xp.findall(xc, agendum['query'])) return (count, agendum['description'].format(match=''))
def unique_pattern(xc, agendum): counts = Counter(xp.findall(xc, agendum['query'])) return (len(counts), agendum['description'].format(match=''))
def find_pattern(xc, agendum): for match in xp.findall(xc, agendum['query']): print(' ', agendum['description'].format(match=match))