예제 #1
0
    def build_dictionary(self, dataset_id):
        texts = DbTextIterator(self.queryset)

        tokenized_texts = self.tokenizer(texts, *self.filters)
        dataset = Dataset.objects.get(pk=dataset_id)
        return Dictionary._create_from_texts(tokenized_texts=tokenized_texts,
                                             name=self.name,
                                             minimum_frequency=self.minimum_frequency,
                                             dataset=dataset,
                                             settings=self.get_dict_settings())
예제 #2
0
    def build_dictionary(self, dataset_id):
        texts = DbTextIterator(self.queryset)

        tokenized_texts = self.tokenizer(texts, self.lemmatizer, *self.filters)
        dataset = Dataset.objects.get(pk=dataset_id)
        return Dictionary._create_from_texts(
            tokenized_texts=tokenized_texts,
            name=self.name,
            minimum_frequency=self.minimum_frequency,
            dataset=dataset,
            settings=self.get_dict_settings())
예제 #3
0
    def build_dictionary(self):

        texts = DbTextIterator(self.queryset, textfield=self.textfield)

        tokenized_texts = self.tokenizer(texts, stoplist=self.stoplist)

        return Dictionary._create_from_texts(
            tokenized_texts=tokenized_texts,
            name=self.name,
            minimum_frequency=self.minimum_frequency,
            dataset=self.queryset.model.__name__,
            settings=self.get_dict_settings())