def load_coded_dictionary(file_or_stream): "Load a dictionary which is split by the codes within it." if isinstance(file_or_stream, (str, unicode)): file_or_stream = open(file_or_stream) word_to_entry = {} coded_dict = defaultdict(dict) for entry in iter_entries(file_or_stream): word = entry.word # Merge any word entries with the same graphical form. if word in word_to_entry: e = word_to_entry[word] e.update(entry) entry = e else: word_to_entry[word] = entry codes = set() for sense in entry.senses: codes.update(get_codes(sense)) # Create a subset of the dictionary for each code. for code in codes: # Note that overwriting is ok, since we already merged any # coincidental forms. coded_dict[code][word] = entry return coded_dict
def iter_coded_entries(filename): """ Returns an iterator over dictionary entries in the filename, with their parsed codes, in (entry, code_list) pairs. """ for entry in iter_entries(filename): codes = set() for sense in entry.senses: codes.update(get_codes(sense)) yield entry, codes