コード例 #1
0
def main():
    with open("/home/beka/thesis/resources/collins/collins.pickle", "rb") as f:
        col = collins.CollinsDictionary(pickle.load(f))
    with open(sys.argv[1]) as f:
        data = [line.strip().split('\t') for line in f if line.strip()]
        nouns = {k: float(v) for k, v in data}
    with open(sys.argv[1] + '.new', 'wt') as f:
        skipped = set()
        while True:
            entry = col.random_entry(POSTags.Noun)
            while entry.key in nouns or entry.key in skipped:
                entry = col.random_entry(POSTags.Noun)
            print("\n\n\n{}#{}".format(entry.key, entry.context))
            print("\n===\n".join(str(s) for s in entry.senses))
            user = input("select 0-5, Q or anything else to skip: ")
            if user == 'Q':
                break
            try:
                score = int(user)
                assert 0 <= score <= 5
                nouns[entry.key] = score
            except:
                skipped.add(entry.key)
                continue
            f.write("{}\t{}\n".format(entry.key, score / 5))
            f.flush()
コード例 #2
0
ファイル: tag_types.py プロジェクト: viksit/ucca
def main():
    with open("/home/beka/thesis/resources/collins/collins.pickle", "rb") as f:
        col = collins.CollinsDictionary(pickle.load(f))
    with open(sys.argv[1]) as f:
        words = [line.strip() for line in f if line.strip()]
        words = words[int(sys.argv[2]):]
    output = []
    for word in words:
        entries = col.by_form(word)
        if not entries:
            output.append((word, 'NOT FOUND'))
            continue
        if all(s.pos != POSTags.Noun for e in entries for s in e.senses):
            output.append((word, 'NOT NOUN'))
            continue
        for entry in entries:
            print("\n\n\n{}#{}".format(entry.key, entry.context))
            print("\n===\n".join(str(s) for s in entry.senses))
        user = input(
            "{}\nselect 0-5, Q or anything else to skip: ".format(word))
        if user == 'Q':
            return output
        try:
            score = int(user)
            assert 0 <= score <= 5
            output.append((word, user))
        except:
            output.append((word, 'NOT NOUN'))
    return output
コード例 #3
0
 def __init__(self, dixon_path, collins_path, wikt_path):
     with open(dixon_path) as f:
         self.dixon = DixonVerbs(ETree.ElementTree().parse(f))
     with open(collins_path, 'rb') as f:
         self.collins = collins.CollinsDictionary(pickle.load(f))
     with open(wikt_path) as f:
         raw_defs = f.read().split('\n')[:-1]  # last line is empty
         self.wikt = wikt.Wiktionary(raw_defs)
     self.stemmer = nltk.stem.snowball.EnglishStemmer()
コード例 #4
0
 def test_basic_usage(self):
     with open("test_files/collins-sample.pickle", "rb") as f:
         raw_dict = pickle.load(f)
     coldict = collins.CollinsDictionary(raw_dict)
     self.assertSequenceEqual(coldict.by_key('aaaaaa'), [])
     self.assertEqual(len(coldict.by_key('apart')), 2)
     self.assertEqual(len(coldict.by_form('droughts')), 1)
     self.assertEqual(len(coldict.by_form('drove')), 2)
     self.assertEqual(len(coldict.by_stem('abort')), 3)
コード例 #5
0
 def __init__(self, collins_path, wikt_path):
     with open(collins_path, 'rb') as f:
         self.collins = collins.CollinsDictionary(pickle.load(f))
     with open(wikt_path) as f:
         raw_defs = f.read().split('\n')[:-1]  # last line is empty
         self.wikt = wikt.Wiktionary(raw_defs)