def get_contextual_summaries(self, raw_texts, query, **options): """Get a contextual summary for each search result. Produces a list of the same length as the raw_texts sequence. For each raw_text, returns snippets of text with the words in the query highlighted using HTML tags. Calls the PostgreSQL function 'ts_headline'. Options are turned into an options string passed to 'ts_headline'. See the documentation for PostgreSQL for more information on the options that can be passed to 'ts_headline'. """ if not raw_texts: return [] s = convert_query(query) options = ','.join(['%s=%s' % (k, v) for k, v in options.items()]) value_clauses = ', '.join(('(%s)',) * len(raw_texts)) stmt = """ SELECT ts_headline(%%s, doc.text, to_tsquery(%%s, %%s), %%s) FROM (VALUES %s) AS doc (text) """ % value_clauses cursor = self.cursor params = (self.ts_config, self.ts_config, s, options) cursor.execute(stmt, params + tuple(raw_texts)) return [ summary.decode(self.connection.encoding) for (summary,) in cursor.fetchall()]
def get_contextual_summaries(self, raw_texts, query, **options): """Get a contextual summary for each search result. Produces a list of the same length as the raw_texts sequence. For each raw_text, returns snippets of text with the words in the query highlighted using HTML tags. Calls the PostgreSQL function 'ts_headline'. Options are turned into an options string passed to 'ts_headline'. See the documentation for PostgreSQL for more information on the options that can be passed to 'ts_headline'. """ if not raw_texts: return [] s = convert_query(query) options = ",".join(["%s=%s" % (k, v) for k, v in options.items()]) value_clauses = ", ".join(("(%s)",) * len(raw_texts)) stmt = ( """ SELECT ts_headline(%%s, doc.text, to_tsquery(%%s, %%s), %%s) FROM (VALUES %s) AS doc (text) """ % value_clauses ) cursor = self.cursor params = (self.ts_config, self.ts_config, s, options) cursor.execute(stmt, params + tuple(raw_texts)) return [summary.decode(self.connection.encoding) for (summary,) in cursor.fetchall()]
def _call(self, query): from repoze.pgtextindex.queryconvert import convert_query return convert_query(query)
def _run_query(self, query, invert=False, docids=None): kw = { 'table': self.table, 'weight': '', 'not': '', 'filter': '', 'limit': '', 'offset': '', 'max_ranked': self.max_ranked, } if invert: kw['not'] = 'NOT' cache = None if IWeightedQuery.providedBy(query): if getattr(query, 'cache_enabled', False): cache_key = (invert, docids) cache = getattr(query, 'cache', None) if cache is None: query.cache = cache = {} result = cache.get(cache_key) if result is not None: # Cache hit. return result kw['weight'] = "'{%s, %s, %s, %s}', " text = getattr(query, 'text', None) if text is None: text = '%s' % query # Use __str__() cq = convert_query(text) params = [ self.ts_config, cq, getattr(query, 'D', 0.1), getattr(query, 'C', 0.2), getattr(query, 'B', 0.4), getattr(query, 'A', 1.0), self.ts_config, cq, ] marker = getattr(query, 'marker', None) if marker: # Match any marker value. if isinstance(marker, basestring): marker = [marker] kw['filter'] += " AND marker && %s::character varying[]" params.insert(2, marker) limit = getattr(query, 'limit', None) if limit: kw['limit'] = "LIMIT %s" params.append(limit) offset = getattr(query, 'offset', None) if offset: kw['offset'] = "OFFSET %s" params.append(offset) else: cq = convert_query(query) params = (self.ts_config, cq, self.ts_config, cq) if docids is not None: docidstr = ','.join(str(docid) for docid in docids) kw['filter'] += ' AND docid IN (%s)' % docidstr stmt = """ WITH _filtered AS ( SELECT docid, coefficient, text_vector FROM %(table)s WHERE %(not)s(text_vector @@ to_tsquery(%%s, %%s)) %(filter)s), _counter AS (SELECT count(1) AS n FROM _filtered), _ranked AS ( SELECT docid, coefficient * ( CASE WHEN n <= %(max_ranked)s THEN ts_rank_cd(%(weight)stext_vector, to_tsquery(%%s, %%s)) ELSE 1 END) AS rank FROM _filtered, _counter) SELECT docid, rank FROM _ranked ORDER BY rank DESC %(limit)s %(offset)s """ % kw cursor = self.cursor cursor.execute(stmt, tuple(params)) result = self.family.IF.BTree() result.update(cursor.fetchall()) if cache is not None: cache[cache_key] = result return result
def _run_query(self, query, invert=False, docids=None): kw = { "table": self.table, "weight": "", "not": "", "filter": "", "limit": "", "offset": "", "max_ranked": self.max_ranked, } if invert: kw["not"] = "NOT" cache = None if IWeightedQuery.providedBy(query): if getattr(query, "cache_enabled", False): cache_key = (invert, docids) cache = getattr(query, "cache", None) if cache is None: query.cache = cache = {} result = cache.get(cache_key) if result is not None: # Cache hit. return result kw["weight"] = "'{%s, %s, %s, %s}', " text = getattr(query, "text", None) if text is None: text = "%s" % query # Use __str__() cq = convert_query(text) params = [ self.ts_config, cq, getattr(query, "D", 0.1), getattr(query, "C", 0.2), getattr(query, "B", 0.4), getattr(query, "A", 1.0), self.ts_config, cq, ] marker = getattr(query, "marker", None) if marker: # Match any marker value. if isinstance(marker, basestring): marker = [marker] kw["filter"] += " AND marker && %s::character varying[]" params.insert(2, marker) limit = getattr(query, "limit", None) if limit: kw["limit"] = "LIMIT %s" params.append(limit) offset = getattr(query, "offset", None) if offset: kw["offset"] = "OFFSET %s" params.append(offset) else: cq = convert_query(query) params = (self.ts_config, cq, self.ts_config, cq) if docids is not None: docidstr = ",".join(str(docid) for docid in docids) kw["filter"] += " AND docid IN (%s)" % docidstr stmt = ( """ WITH _filtered AS ( SELECT docid, coefficient, text_vector FROM %(table)s WHERE %(not)s(text_vector @@ to_tsquery(%%s, %%s)) %(filter)s), _counter AS (SELECT count(1) AS n FROM _filtered), _ranked AS ( SELECT docid, coefficient * ( CASE WHEN n <= %(max_ranked)s THEN ts_rank_cd(%(weight)stext_vector, to_tsquery(%%s, %%s)) ELSE 1 END) AS rank FROM _filtered, _counter) SELECT docid, rank FROM _ranked ORDER BY rank DESC %(limit)s %(offset)s """ % kw ) cursor = self.cursor cursor.execute(stmt, tuple(params)) result = self.family.IF.BTree() result.update(cursor.fetchall()) if cache is not None: cache[cache_key] = result return result