def tokenize(self, text, lang): grams = {} for gram in ngrams.ngram(self.tokenizer.tokenize(text, lang, True), self.n): grams[gram] = 1 return grams
def tokenize(self, text): grams = defaultdict(int) for t in self.tokenizer.tokenize(text): if t: for gram in ngrams.ngram(list(t), self.n): grams[gram] += 1 return grams
def tokenize(self,text): grams = defaultdict(int) for t in self.tokenizer.tokenize(text): if t: for gram in ngrams.ngram(list(t), self.n): grams[gram] += 1 return grams
def tokenize(self,text,lang): grams = {} for gram in ngrams.ngram(self.tokenizer.tokenize(text,lang,True), self.n): grams[gram] = 1 return grams