def pos(word, out, in_pickle): WORD = util.read_annotation(word) OUT = {} pos = pickle.load(open(in_pickle, 'rb')) for tokid in WORD: OUT[tokid] = pos.get(WORD[tokid],'OTHER').rstrip('0123456789*abcdefghijklmnopqrstuvwxyz') util.clear_annotation(out) util.write_annotation(out, OUT)
def make_longrafsi(rafsi, out): RAFSI = util.read_annotation(rafsi) OUT = {} for tokid in RAFSI: rafsi = RAFSI[tokid].split("|") if rafsi and rafsi[0]: OUT[tokid] = "|".join(expand(a) for a in rafsi) util.clear_annotation(out) util.write_annotation(out, OUT)
def make_longrafsi(rafsi, out, in_pickle): RAFSI = util.read_annotation(rafsi) OUT = {} rafsi_dict = pickle.load(open(in_pickle, 'rb')) for tokid in RAFSI: rafsi = RAFSI[tokid].split("|") if rafsi and rafsi[0]: OUT[tokid] = "|".join(rafsi_dict.get(r, 'UNDEF') for r in rafsi) util.clear_annotation(out) util.write_annotation(out, OUT)
def pos(word, out, in_pickle): WORD = util.read_annotation(word) OUT = {} pos = pickle.load(open(in_pickle, 'rb')) for tokid in WORD: OUT[tokid] = pos.get( WORD[tokid], 'OTHER').rstrip('0123456789*abcdefghijklmnopqrstuvwxyz') util.clear_annotation(out) util.write_annotation(out, OUT)
def make_rafsi(word, pos, out): WORD = util.read_annotation(word) POS = util.read_annotation(pos) OUT = {} for tokid in WORD: w = WORD[tokid] if POS[tokid] == "OTHER": rafsi = compound_to_affixes(w) OUT[tokid] = "|".join(rafsi) util.clear_annotation(out) util.write_annotation(out, OUT)
def experimental(word, tai, out, in_pickle): WORD = util.read_annotation(word) TAI = util.read_annotation(tai) catni = pickle.load(open(in_pickle, 'rb')) OUT = {} for tokid in WORD: if TAI[tokid] in ['cmavo','gismu']: OUT[tokid] = str(WORD[tokid].replace('.','') not in catni) else: OUT[tokid] = "UNDEF" util.clear_annotation(out) util.write_annotation(out, OUT)
def experimental(word, tai, out, in_pickle): WORD = util.read_annotation(word) TAI = util.read_annotation(tai) catni = pickle.load(open(in_pickle, 'rb')) OUT = {} for tokid in WORD: if TAI[tokid] in ['cmavo', 'gismu']: OUT[tokid] = str(WORD[tokid].replace('.', '') not in catni) else: OUT[tokid] = "UNDEF" util.clear_annotation(out) util.write_annotation(out, OUT)
def vlatai(word, pos, out): WORD = util.read_annotation(word) POS = util.read_annotation(pos) inp = [] tokids = [] for tokid in WORD: inp.append(WORD[tokid].replace('.','')) tokids.append(tokid) p = Popen('vlatai', stdin=PIPE, stdout=PIPE, stderr=PIPE) lines, err = p.communicate(input='\n'.join(inp)) p.wait() OUT = {} for tokid, line in zip(tokids, lines.split('\n')): v = line.split(':')[1].strip().split()[0] if v == 'cmavo(s)': v = 'cmavo' OUT[tokid] = v util.clear_annotation(out) util.write_annotation(out, OUT)
def vlatai(word, pos, out): WORD = util.read_annotation(word) POS = util.read_annotation(pos) inp = [] tokids = [] for tokid in WORD: inp.append(WORD[tokid].replace('.', '')) tokids.append(tokid) p = Popen('vlatai', stdin=PIPE, stdout=PIPE, stderr=PIPE) lines, err = p.communicate(input='\n'.join(inp)) p.wait() OUT = {} for tokid, line in zip(tokids, lines.split('\n')): v = line.split(':')[1].strip().split()[0] if v == 'cmavo(s)': v = 'cmavo' OUT[tokid] = v util.clear_annotation(out) util.write_annotation(out, OUT)