def main(): with open("/home/beka/thesis/resources/collins/collins.pickle", "rb") as f: col = collins.CollinsDictionary(pickle.load(f)) with open(sys.argv[1]) as f: data = [line.strip().split('\t') for line in f if line.strip()] nouns = {k: float(v) for k, v in data} with open(sys.argv[1] + '.new', 'wt') as f: skipped = set() while True: entry = col.random_entry(POSTags.Noun) while entry.key in nouns or entry.key in skipped: entry = col.random_entry(POSTags.Noun) print("\n\n\n{}#{}".format(entry.key, entry.context)) print("\n===\n".join(str(s) for s in entry.senses)) user = input("select 0-5, Q or anything else to skip: ") if user == 'Q': break try: score = int(user) assert 0 <= score <= 5 nouns[entry.key] = score except: skipped.add(entry.key) continue f.write("{}\t{}\n".format(entry.key, score / 5)) f.flush()
def main(): with open("/home/beka/thesis/resources/collins/collins.pickle", "rb") as f: col = collins.CollinsDictionary(pickle.load(f)) with open(sys.argv[1]) as f: words = [line.strip() for line in f if line.strip()] words = words[int(sys.argv[2]):] output = [] for word in words: entries = col.by_form(word) if not entries: output.append((word, 'NOT FOUND')) continue if all(s.pos != POSTags.Noun for e in entries for s in e.senses): output.append((word, 'NOT NOUN')) continue for entry in entries: print("\n\n\n{}#{}".format(entry.key, entry.context)) print("\n===\n".join(str(s) for s in entry.senses)) user = input( "{}\nselect 0-5, Q or anything else to skip: ".format(word)) if user == 'Q': return output try: score = int(user) assert 0 <= score <= 5 output.append((word, user)) except: output.append((word, 'NOT NOUN')) return output
def __init__(self, dixon_path, collins_path, wikt_path): with open(dixon_path) as f: self.dixon = DixonVerbs(ETree.ElementTree().parse(f)) with open(collins_path, 'rb') as f: self.collins = collins.CollinsDictionary(pickle.load(f)) with open(wikt_path) as f: raw_defs = f.read().split('\n')[:-1] # last line is empty self.wikt = wikt.Wiktionary(raw_defs) self.stemmer = nltk.stem.snowball.EnglishStemmer()
def test_basic_usage(self): with open("test_files/collins-sample.pickle", "rb") as f: raw_dict = pickle.load(f) coldict = collins.CollinsDictionary(raw_dict) self.assertSequenceEqual(coldict.by_key('aaaaaa'), []) self.assertEqual(len(coldict.by_key('apart')), 2) self.assertEqual(len(coldict.by_form('droughts')), 1) self.assertEqual(len(coldict.by_form('drove')), 2) self.assertEqual(len(coldict.by_stem('abort')), 3)
def __init__(self, collins_path, wikt_path): with open(collins_path, 'rb') as f: self.collins = collins.CollinsDictionary(pickle.load(f)) with open(wikt_path) as f: raw_defs = f.read().split('\n')[:-1] # last line is empty self.wikt = wikt.Wiktionary(raw_defs)