Exemple #1
0
 def load(self) -> None:
     self.inverted_index = load_pickle(self.load_path /
                                       self.inverted_index_filename)
     self.entities_list = load_pickle(self.load_path /
                                      self.entities_list_filename)
     self.q2name = load_pickle(self.load_path / self.q2name_filename)
     if self.who_entities_filename:
         self.who_entities = load_pickle(self.load_path /
                                         self.who_entities_filename)
     if self.freq_dict_filename:
         self.load_freq_dict(self.freq_dict_filename)
Exemple #2
0
 def load(self) -> None:
     self.word_to_idlist = load_pickle(self.load_path / self.word_to_idlist_filename)
     self.entities_list = load_pickle(self.load_path / self.entities_list_filename)
     self.word_list = list(self.word_to_idlist.keys())
     self.entities_ranking_dict = load_pickle(self.load_path / self.entities_ranking_filename)
     if not self.fit_vectorizer:
         self.vectorizer = load_pickle(self.load_path / self.vectorizer_filename)
         self.faiss_index = faiss.read_index(str(expand_path(self.faiss_index_filename)))
         if self.use_gpu:
             res = faiss.StandardGpuResources()
             self.faiss_index = faiss.index_cpu_to_gpu(res, 0, self.faiss_index)
Exemple #3
0
    def __init__(self, data_dir=None, *args, **kwargs):
        if data_dir is None:
            data_dir = paths.USR_PATH
        data_dir = Path(data_dir)
        if self.dict_name is None:
            self.dict_name = args[0] if args else kwargs.get(
                'dictionary_name', 'dictionary')

        data_dir = data_dir / self.dict_name

        alphabet_path = data_dir / 'alphabet.pkl'
        words_path = data_dir / 'words.pkl'
        words_trie_path = data_dir / 'words_trie.pkl'

        if not is_done(data_dir):
            print('Trying to build a dictionary in {}'.format(data_dir),
                  file=sys.stderr)
            if data_dir.is_dir():
                shutil.rmtree(data_dir)
            data_dir.mkdir(parents=True)

            words = self._get_source(data_dir, *args, **kwargs)
            words = {self._normalize(word) for word in words}

            alphabet = {c for w in words for c in w}
            alphabet.remove('⟬')
            alphabet.remove('⟭')

            save_pickle(alphabet, alphabet_path)
            save_pickle(words, words_path)

            words_trie = defaultdict(set)
            for word in words:
                for i in range(len(word)):
                    words_trie[word[:i]].add(word[:i + 1])
                words_trie[word] = set()
            words_trie = {k: sorted(v) for k, v in words_trie.items()}

            save_pickle(words_trie, words_trie_path)

            mark_done(data_dir)
            print('built', file=sys.stderr)
        else:
            print('Loading a dictionary from {}'.format(data_dir),
                  file=sys.stderr)

        self.alphabet = load_pickle(alphabet_path)
        self.words_set = load_pickle(words_path)
        self.words_trie = load_pickle(words_trie_path)
Exemple #4
0
    def __init__(self, data_dir=None, *args, **kwargs):
        if data_dir is None:
            data_dir = paths.USR_PATH
        data_dir = Path(data_dir)
        if self.dict_name is None:
            self.dict_name = args[0] if args else kwargs.get('dictionary_name', 'dictionary')

        data_dir = data_dir / self.dict_name

        alphabet_path = data_dir / 'alphabet.pkl'
        words_path = data_dir / 'words.pkl'
        words_trie_path = data_dir / 'words_trie.pkl'

        if not is_done(data_dir):
            print('Trying to build a dictionary in {}'.format(data_dir), file=sys.stderr)
            if data_dir.is_dir():
                shutil.rmtree(data_dir)
            data_dir.mkdir(parents=True)

            words = self._get_source(data_dir, *args, **kwargs)
            words = {self._normalize(word) for word in words}

            alphabet = {c for w in words for c in w}
            alphabet.remove('⟬')
            alphabet.remove('⟭')

            save_pickle(alphabet, alphabet_path)
            save_pickle(words, words_path)

            words_trie = defaultdict(set)
            for word in words:
                for i in range(len(word)):
                    words_trie[word[:i]].add(word[:i+1])
                words_trie[word] = set()
            words_trie = {k: sorted(v) for k, v in words_trie.items()}

            save_pickle(words_trie, words_trie_path)

            mark_done(data_dir)
            print('built', file=sys.stderr)
        else:
            print('Loading a dictionary from {}'.format(data_dir), file=sys.stderr)

        self.alphabet = load_pickle(alphabet_path)
        self.words_set = load_pickle(words_path)
        self.words_trie = load_pickle(words_trie_path)
 def load(self):
     """
     Load model from file.
     """
     try:
         return load_pickle(self.ser_path)
     except FileNotFoundError as e:
         raise (e, "There is no model in the specified path: {}".format(
             self.ser_path))
    def __init__(self,
                 data_dir: [Path, str] = '',
                 *args,
                 dictionary_name: str = 'dictionary',
                 **kwargs):
        data_dir = expand_path(data_dir) / dictionary_name

        alphabet_path = data_dir / 'alphabet.pkl'
        words_path = data_dir / 'words.pkl'
        words_trie_path = data_dir / 'words_trie.pkl'

        if not is_done(data_dir):
            log.info('Trying to build a dictionary in {}'.format(data_dir))
            if data_dir.is_dir():
                shutil.rmtree(str(data_dir))
            data_dir.mkdir(parents=True)

            words = self._get_source(data_dir, *args, **kwargs)
            words = {self._normalize(word) for word in words}

            alphabet = {c for w in words for c in w}
            alphabet.remove('⟬')
            alphabet.remove('⟭')

            save_pickle(alphabet, alphabet_path)
            save_pickle(words, words_path)

            words_trie = defaultdict(set)
            for word in words:
                for i in range(len(word)):
                    words_trie[word[:i]].add(word[:i + 1])
                words_trie[word] = set()
            words_trie = {k: sorted(v) for k, v in words_trie.items()}

            save_pickle(words_trie, words_trie_path)

            mark_done(data_dir)
            log.info('built')
        else:
            log.info('Loading a dictionary from {}'.format(data_dir))

        self.alphabet = load_pickle(alphabet_path)
        self.words_set = load_pickle(words_path)
        self.words_trie = load_pickle(words_trie_path)
Exemple #7
0
    def load(self, **kwargs) -> None:
        """Load classifier parameters"""
        log.info(f"Loading model from {self.load_path}")
        for path in self.load_path:
            if Path.is_file(path):
                self.ec_data += load_pickle(path)
            else:
                raise FileNotFoundError

        log.info(f"Loaded items {len(self.ec_data)}")
Exemple #8
0
    def load(self, **kwargs) -> None:
        """Load classifier parameters"""
        log.info(f"Loading model from {self.load_path}")
        for path in self.load_path:
            if is_file_exist(path):
                self.ec_data += load_pickle(path)
            else:
                log.info(f"File {path} does not exist")

        log.info(f"Loaded items {len(self.ec_data)}")
Exemple #9
0
    def load(self, **kwargs) -> None:
        """Load classifier parameters"""
        log.info(f"Loading model from {self.load_path}")
        for path in self.load_path:
            if Path.is_file(path):
                self.ec_data += load_pickle(path)
            else:
                raise FileNotFoundError

        log.info(f"Loaded items {len(self.ec_data)}")
Exemple #10
0
    def __init__(self, data_dir: [Path, str]='', *args, dictionary_name: str='dictionary', **kwargs):
        data_dir = expand_path(data_dir) / dictionary_name

        alphabet_path = data_dir / 'alphabet.pkl'
        words_path = data_dir / 'words.pkl'
        words_trie_path = data_dir / 'words_trie.pkl'

        if not is_done(data_dir):
            log.info('Trying to build a dictionary in {}'.format(data_dir))
            if data_dir.is_dir():
                shutil.rmtree(str(data_dir))
            data_dir.mkdir(parents=True)

            words = self._get_source(data_dir, *args, **kwargs)
            words = {self._normalize(word) for word in words}

            alphabet = {c for w in words for c in w}
            alphabet.remove('⟬')
            alphabet.remove('⟭')

            save_pickle(alphabet, alphabet_path)
            save_pickle(words, words_path)

            words_trie = defaultdict(set)
            for word in words:
                for i in range(len(word)):
                    words_trie[word[:i]].add(word[:i+1])
                words_trie[word] = set()
            words_trie = {k: sorted(v) for k, v in words_trie.items()}

            save_pickle(words_trie, words_trie_path)

            mark_done(data_dir)
            log.info('built')
        else:
            log.info('Loading a dictionary from {}'.format(data_dir))

        self.alphabet = load_pickle(alphabet_path)
        self.words_set = load_pickle(words_path)
        self.words_trie = load_pickle(words_trie_path)
Exemple #11
0
    def __init__(self, wiki_filename: str, file_format: str = "hdt", lang: str = "@en", **kwargs) -> None:
        """

        Args:
            wiki_filename: file with Wikidata
            file_format: format of Wikidata file
            lang: Russian or English language
            **kwargs:
        """
        self.description_rel = "http://schema.org/description"
        self.file_format = file_format
        self.wiki_filename = str(expand_path(wiki_filename))
        if self.file_format == "hdt":
            self.document = HDTDocument(self.wiki_filename)
        elif self.file_format == "pickle":
            self.document = load_pickle(self.wiki_filename)
        else:
            raise ValueError("Unsupported file format")
        self.lang = lang
 def load(self) -> None:
     logger.info("Loading tfidf_vectorizer from {}".format(self.load_path))
     self.vectorizer = load_pickle(expand_path(self.load_path))
Exemple #13
0
 def load(self) -> None:
     """Load classifier parameters"""
     log.info("Loading from {}".format(self.load_path))
     self.ec_data, self.x_train_features = load_pickle(
         expand_path(self.load_path))
 def load(self) -> None:
     """Load model"""
     logger.info("Loading tfidf_vectorizer from {}".format(self.load_path))
     self.vectorizer = load_pickle(expand_path(self.load_path))
     self.token2idx = self.vectorizer.vocabulary_
Exemple #15
0
 def load(self) -> None:
     logger.info("Loading classifier from {}".format(self.load_path))
     self.clf = load_pickle(expand_path(self.load_path))
Exemple #16
0
 def __init__(self, wiki_first_par_filename, entities_num=2, **kwargs):
     self.wiki_first_par = load_pickle(str(expand_path(wiki_first_par_filename)))
     self.entities_num = entities_num
Exemple #17
0
 def __init__(self, q_to_page_filename, entities_num=5, **kwargs):
     self.q_to_page = load_pickle(str(expand_path(q_to_page_filename)))
     self.entities_num = entities_num
 def load(self) -> None:
     self.q_to_name = load_pickle(self.load_path / self.q2name_filename)
     if self._relations_filename is not None:
         self._relations_mapping = load_pickle(self.load_path /
                                               self._relations_filename)
     self.wikidata = load_pickle(self.load_path / self.wiki_filename)
 def load(self) -> None:
     """Load model"""
     logger.info("Loading tfidf_vectorizer from {}".format(self.load_path))
     self.vectorizer = load_pickle(expand_path(self.load_path))
     self.token2idx = self.vectorizer.vocabulary_
Exemple #20
0
 def load(self) -> None:
     """Load classifier parameters"""
     logger.info("Loading faq_logreg_model from {}".format(self.load_path))
     self.logreg = load_pickle(expand_path(self.load_path))
 def load(self) -> None:
     """Load classifier parameters"""
     logger.info("Loading faq_model from {}".format(self.load_path))
     self.x_train_features, self.y_train = load_pickle(expand_path(self.load_path))
Exemple #22
0
 def load(self) -> None:
     """Load classifier parameters"""
     logger.info("Loading faq_model from {}".format(self.load_path))
     self.x_train_features, self.y_train = load_pickle(
         expand_path(self.load_path))
 def load(self) -> None:
     self.rel_q2name = load_pickle(self.load_path /
                                   self.rel_q2name_filename)
Exemple #24
0
 def load(self) -> None:
     logger.info("Loading faq_model from {}".format(self.load_path))
     self.x_train_features, self.y_train = load_pickle(
         expand_path(self.load_path))
Exemple #25
0
 def load(self) -> None:
     self.inverted_index = load_pickle(self.load_path / self.inverted_index_filename)
     self.entities_list = load_pickle(self.load_path / self.entities_list_filename)
     self.q2name = load_pickle(self.load_path / self.q2name_filename)
 def load(self) -> None:
     """Load TF-IDF vectorizer"""
     logger.info("Loading tfidf_vectorizer from {}".format(expand_path(self.load_path)))
     self.vectorizer = load_pickle(expand_path(self.load_path))