def update_index(self, pk=None): if pk is not None: if isinstance(pk, (list, tuple)): items = self.filter(pk__in=pk) else: items = self.filter(pk=pk) items[0]._index.all().delete() else: items = self.all() model_type = ContentType.objects.get_for_model(self.model) Index.objects.filter(content_type__pk=model_type.id).delete() IW = {} for item in items: for field, weight in self._fields.items(): for w in set(getattr(item, field).lower().split(' ')): if w and w not in FTS_STOPWORDS[self.language_code]: p = Stemmer(self.language_code) w = p(w) try: iw = IW[w] except KeyError: iw = IndexWord.objects.get_or_create(word=w)[0] IW[w] = iw i = Index(content_object=item, word=iw, weight=WEIGHTS[weight]) i.save()
def search(self, query, **kwargs): params = Q() for w in set(query.lower().split(' ')): if w and w not in FTS_STOPWORDS[self.language_code]: p = Stemmer(self.language_code) w = p(w) for field in self._fields.keys(): params &= Q(**{'%s__icontains' % field: w}) return self.filter(params)
def _get_words(self, line, minlen=0): # Remove accents line = ''.join((c for c in unicodedata.normalize('NFD', unicode(line)) if unicodedata.category(c) != 'Mn')) # Lowercase and split in a set of words words = set(SEP.split(line.lower())) # Stemmer function if self.stem_words: stem = Stemmer(self.language_code) else: stem = lambda w: w # Get stemmed set of words not in the list of stop words and with a minimum of a minlen length return set( stem(word) for word in words if word and word not in FTS_STOPWORDS[self.language_code] and len(word) > minlen )
def search(self, query, **kwargs): params = Q() #SELECT core_blog.*, count(DISTINCT word_id), sum(weight) #FROM core_blog INNER JOIN fts_index ON (core_blog.id = fts_index.object_id) INNER JOIN fts_indexword ON (fts_index.word_id = fts_indexword.id) #WHERE fts_index.content_type_id = 18 AND (fts_indexword.word='titl' OR fts_indexword.word='simpl') #GROUP BY core_blog.id, core_blog.title, core_blog.body #HAVING count(DISTINCT word_id) = 2; words = 0 for w in set(query.lower().split(' ')): if w and w not in FTS_STOPWORDS[self.language_code]: words += 1 p = Stemmer(self.language_code) w = p(w) params |= Q(_index__word__word=w) qs = self.filter(params) #if words > 1: # qs.query.group_by = ['core_blog.id, core_blog.title, core_blog.body'] # qs.query.having = ['(COUNT(DISTINCT fts_index.word_id)) = %d' % words] return qs.distinct()