Esempio n. 1
0
    def buildTreesAndDics(self, text):
        tic = time()

        for i in range(self.max_len):
            n_gram_list = sum(
                map(lambda x: Cleaner.n_gram(x, i + 1), text), [])
            self.len_dict[i + 1] = len(n_gram_list)
            if i >= 1:
                self.vocabulary.extend(list(set(n_gram_list)))
            for word in n_gram_list:
                self.prefixTree.insert(word, i + 1)
                self.suffixTree.insert(word, i + 1)
            sys.stdout.write('build tree done %d/%d\r' % (i, self.max_len))
Esempio n. 2
0
    def buildTreesAndDics(self, text):
        tic = time()

        pbar = tqdm(range(self.max_len))
        for i in pbar:
            pbar.set_description("buildTreesAndDics, %d-gram \n" % (i + 1))
            n_gram_list = sum(map(lambda x: Cleaner.n_gram(x, i + 1), text),
                              [])
            self.len_dict[i + 1] = len(n_gram_list)
            if i >= 1:
                self.vocabulary.extend(list(set(n_gram_list)))
            for word in n_gram_list:
                self.prefixTree.insert(word, i + 1)
                self.suffixTree.insert(word, i + 1)
        print("build tree done! %.2fs" % (time() - tic))