Exemple #1
0
    def tokenize(self,text):
        grams = defaultdict(int)
        for t in self.tokenizer.tokenize(text):
            if t:
                for gram in  ngrams.ngram(list(t), self.n):
                    grams[gram] += 1


        return grams
Exemple #2
0
    def tokenize(self,text,lang):
        grams = {}
        for gram in  ngrams.ngram(self.tokenizer.tokenize(text,lang,True), self.n):
            grams[gram] = 1

        return grams