class Vocab(object): def __init__(self, dic, start_index=0): if isinstance(dic, Trie): self._dic = dic else: self._dic = Trie(dic) self._start_index = start_index @property def size(self): return len(self) def __len__(self): return len(self._dic) def __iter__(self): return iter(self._dic) def __contains__(self, key): return key in self._dic def get_index(self, key, default=None): try: return self._dic.key_id(key) + self._start_index except KeyError: return default def get_key_by_index(self, index): return self._dic.restore_key(index - self._start_index) def save(self, out_file): joblib.dump(self.serialize(), out_file) def serialize(self): return dict(dic=self._dic.tobytes(), start_index=self._start_index)