Ejemplo n.º 1
0
class Vocab(object):
    def __init__(self, dic, start_index=0):
        if isinstance(dic, Trie):
            self._dic = dic
        else:
            self._dic = Trie(dic)

        self._start_index = start_index

    @property
    def size(self):
        return len(self)

    def __len__(self):
        return len(self._dic)

    def __iter__(self):
        return iter(self._dic)

    def __contains__(self, key):
        return key in self._dic

    def get_index(self, key, default=None):
        try:
            return self._dic.key_id(key) + self._start_index
        except KeyError:
            return default

    def get_key_by_index(self, index):
        return self._dic.restore_key(index - self._start_index)

    def save(self, out_file):
        joblib.dump(self.serialize(), out_file)

    def serialize(self):
        return dict(dic=self._dic.tobytes(), start_index=self._start_index)