'nmod_comp': 'nmod:comp', 'nmod_pass': '******', 'dobj_cau': 'dobj:cau', 'ccomp_cau': 'ccomp:cau', 'aux_q': 'aux:q', 'xcomp_sc': 'xcomp', 'Person_psor': 'Person[psor]', 'Number_psor': 'Number[psor]', } for f in sys.argv[1:]: basename = f[:-4] with open(f, "r") as fp: ann = brat.read_ann(fp) conllu = [{}] # dummy first token ensures that the indexes match for i in sorted(ann.tokens.keys()): tok = ann.tokens[i] token = {'id' : tok.id, 'form' : tok.token, 'cpos' : tok.pos, 'pos' : tok.pos, 'head' : None, 'deprel' : None, 'deps': "_"} feats = "" for (label, val) in tok.attrs: if len(feats): delim = "|"
provided with the one in morph/ directory. It also copies the .txt file over from the same directory. """ import sys, shutil from collections import namedtuple from brat import read_ann, write_ann, Rel, Ann # oldfp = open(sys.argv[1], 'r') # newfp = open(sys.argv[2], 'r') basename = sys.argv[1].replace('.ann', '') oldfname = basename + '.ann' oldfp = open(oldfname, 'r') newfp = open('morph/' + oldfname, 'r') old = read_ann(oldfp) new = read_ann(newfp) oldfp.close() newfp.close() tok_out = {} rel_out = {} tok_map = {} from difflib import SequenceMatcher s = SequenceMatcher(isjunk=lambda x: x in "_.?!", a=[v.token for k, v in old.tokens.items()], b=[v.token for k, v in new.tokens.items()], autojunk=True)
provided with the one in morph/ directory. It also copies the .txt file over from the same directory. """ import sys, shutil from collections import namedtuple from brat import read_ann,write_ann,Rel,Ann # oldfp = open(sys.argv[1], 'r') # newfp = open(sys.argv[2], 'r') basename = sys.argv[1].replace('.ann', '') oldfname = basename + '.ann' oldfp = open(oldfname, 'r') newfp = open('morph/' + oldfname, 'r') old = read_ann(oldfp) new = read_ann(newfp) oldfp.close() newfp.close() tok_out = {} rel_out = {} tok_map = {} from difflib import SequenceMatcher s = SequenceMatcher(isjunk=lambda x: x in "_.?!", a=[v.token for k, v in old.tokens.items()], b=[v.token for k, v in new.tokens.items()], autojunk=True)
'nmod_cau': 'nmod:cau', 'nmod_comp': 'nmod:comp', 'nmod_pass': '******', 'dobj_cau': 'dobj:cau', 'ccomp_cau': 'ccomp:cau', 'aux_q': 'aux:q', 'xcomp_sc': 'xcomp', 'Person_psor': 'Person[psor]', 'Number_psor': 'Number[psor]', } for f in sys.argv[1:]: basename = f[:-4] with open(f, "r") as fp: ann = brat.read_ann(fp) conllu = [{}] # dummy first token ensures that the indexes match for i in sorted(ann.tokens.keys()): tok = ann.tokens[i] token = { 'id': tok.id, 'form': tok.token, 'cpos': tok.pos, 'pos': tok.pos, 'head': None, 'deprel': None, 'deps': "_" } feats = ""