def build_dictionary(self, dataset_id): texts = DbTextIterator(self.queryset) tokenized_texts = self.tokenizer(texts, *self.filters) dataset = Dataset.objects.get(pk=dataset_id) return Dictionary._create_from_texts(tokenized_texts=tokenized_texts, name=self.name, minimum_frequency=self.minimum_frequency, dataset=dataset, settings=self.get_dict_settings())
def build_dictionary(self, dataset_id): texts = DbTextIterator(self.queryset) tokenized_texts = self.tokenizer(texts, self.lemmatizer, *self.filters) dataset = Dataset.objects.get(pk=dataset_id) return Dictionary._create_from_texts( tokenized_texts=tokenized_texts, name=self.name, minimum_frequency=self.minimum_frequency, dataset=dataset, settings=self.get_dict_settings())
def build_dictionary(self): texts = DbTextIterator(self.queryset, textfield=self.textfield) tokenized_texts = self.tokenizer(texts, stoplist=self.stoplist) return Dictionary._create_from_texts( tokenized_texts=tokenized_texts, name=self.name, minimum_frequency=self.minimum_frequency, dataset=self.queryset.model.__name__, settings=self.get_dict_settings())