Example #1
0
def load_datasets(fnames, lowercase=True):
    datasets = []
    for fn in fnames:
        d = Dataset.load_conll(fn)
        print "\t%d examples in %s" % (len(d), fn)
        if lowercase:
            converters = {'word': lambda word_list: [x.lower() if x is not None else None for x in word_list]}
            d.convert(converters, in_place=True)
        datasets.append(d)
    return datasets
Example #2
0
def get_counter_for_field(filelist, field):
    c = Counter()
    for fin_name in filelist:
        print('loading {}'.format(fin_name))
        d = Dataset.load_conll(fin_name)
        for i, row in enumerate(d):
            for j in range(len(row['word'])):
                t = row[field][j]
                if t == SKIP_TOKEN or t is None:
                    continue
                else:
                    c[t] += 1
    return c
Example #3
0
from stanza.text.dataset import Dataset

# for fin_name in ['train.conll', 'dev.conll', 'test.conll']:
#     fout_name = fin_name.replace('.conll', '.anon.conll')
#     print('loading {}'.format(fin_name))
#     d = Dataset.load_conll(fin_name)
#     print(d)
#     for i, row in enumerate(d):
#         if row['subj'] == 'SUBJECT':
#             d.fields['word'][i] = row['subj_ner']
#         if row['obj'] == 'OBJECT':
#             d.fields['word'][i] = row['obj_ner']
#     d.write_conll(fout_name)

for fin_name in ['train.conll', 'dev.conll', 'test.conll']:
    fout_name = fin_name.replace('.conll', '.anon.conll')
    print('loading {}'.format(fin_name))
    d = Dataset.load_conll(fin_name)
    print(d)
    for i, row in enumerate(d):
        for j in range(len(row['word'])):
            if row['subj'][j] == 'SUBJECT':
                d.fields['word'][i][j] = 'NER-' + row['subj_ner'][j]
            if row['obj'][j] == 'OBJECT':
                d.fields['word'][i][j] = 'NER-' + row['obj_ner'][j]
    d.write_conll(fout_name)
 def test_load_conll(self):
     with NamedTemporaryFile() as f:
         f.write(self.CONLL)
         f.flush()
         d = Dataset.load_conll(f.name)
         self.assertDictEqual(self.CONLL_MOCK, d.fields)