Example #1
0
def corpus_to_xigt(corp: Corpus):
    """
    Given an INTENT2 Corpus object,
    return its representation in xigtxml format.
    """
    xc = XigtCorpus()
    EXPORT_LOG.info('Preparing to export INTENT2 Coprus to Xigt')
    for inst in corp:
        xigt_inst = instance_to_xigt(inst)
        try:
            dumps(XigtCorpus(igts=[xigt_inst]))
            xc.append(xigt_inst)
        except (TypeError, XigtError) as te:
            EXPORT_LOG.error('Error in serializing instance "{}": {}'.format(
                inst.id, te))
    EXPORT_LOG.info(
        'Corpus successfully converted. Returning string for writing.')
    return dumps(xc)
Example #2
0
def run(args):
    if args.infiles:
        for fn in args.infiles:
            logging.info('Cleaning {}'.format(fn))
            xc = xigtxml.load(fn, mode='full')
            clean_corpus(xc)
            xigtxml.dump(fn, xc)
    else:
        xc = xigtxml.load(sys.stdin, mode='full')
        clean_corpus(xc)
        print(xigtxml.dumps(xc))
Example #3
0
def run(args):
    if args.infiles:
        for fn in args.infiles:
            logging.info('Normalizing {}'.format(fn))
            xc = xigtxml.load(fn, mode='full')
            normalize_corpus(xc)
            xigtxml.dump(fn, xc)
    else:
        xc = xigtxml.load(sys.stdin, mode='full')
        normalize_corpus(xc)
        print(xigtxml.dumps(xc))
Example #4
0
def run(args):
    xc = xigtxml.load(args.infile)
    if args.igt_key:
        logging.info('Sorting %s IGTs' % args.infile)
        xc.sort(key=make_sortkey(args.igt_key))
    if args.tier_key:
        logging.info('Sorting %s tiers by key' % args.infile)
        for igt in xc:
            igt.sort(key=make_sortkey(args.tier_key))
    elif args.tier_deps:
        logging.info('Sorting %s tiers by ref-dependencies' % args.infile)
        refattrs = [ra.strip() for ra in args.tier_deps.split(',')]
        for igt in xc:
            igt.sort_tiers(refattrs=refattrs)
    if args.item_key:
        logging.info('Sorting %s items by key' % args.infile)
        for igt in xc:
            for tier in igt:
                tier.sort(key=make_sortkey(args.item_key))
    if args.in_place:
        xigtxml.dump(args.infile, xc)
    else:
        print(xigtxml.dumps(xc))
Example #5
0
)

# cycle 1
xc4 = XigtCorpus(
    id="xc1",
    igts=[Igt(id="i1", tiers=[Tier(id="w", type="words", segmentation="w", items=[Item(id="w1", segmentation="w1")])])],
)

# cycle 2
xc5 = XigtCorpus(
    id="xc1",
    igts=[
        Igt(
            id="i1",
            tiers=[
                Tier(
                    id="w",
                    type="words",
                    segmentation="w",
                    items=[Item(id="w1", segmentation="w1,w2"), Item(id="w2", segmentation="w1,w2")],
                )
            ],
        )
    ],
)

if __name__ == "__main__":
    from xigt.codecs import xigtxml

    print(xigtxml.dumps(xc1m))
Example #6
0
    else:
        return xigtxml.default_decode_meta(elem)

### Encoding ###

def encode_meta(meta):
    metatype = meta.type.lower()
    if metatype in ('judgment', 'vetted', 'phenomena'):
        attributes = dict(type=meta.type, **meta.attributes)
        e = etree.Element('meta', attrib=attributes)
        if metatype == 'phenomena':
            for phenomenon in meta.content:
                p = etree.Element('phenomenon')
                p.text = phenomenon
                e.append(p)
        return e
    else:
        return xigtxml.default_encode_meta(meta)

### Function maps ###

xigtxml.decode_meta = matrix_decode_meta
xigtxml.encode_meta = matrix_encode_meta

if __name__ == '__main__':
    import sys
    f = sys.argv[1]
    xc = xigtxml.load(open(f,'r'))
    print(xigtxml.dumps(xc, pretty_print=True))
    xigtxml.dump(open('abkhaz-out.xigt','w'), xc, pretty_print=True)