Esempio n. 1
0
    def get_contextual_summaries(self, raw_texts, query, **options):
        """Get a contextual summary for each search result.

        Produces a list of the same length as the raw_texts sequence.
        For each raw_text, returns snippets of text with the words in
        the query highlighted using HTML tags. Calls the
        PostgreSQL function 'ts_headline'. Options are turned into an
        options string passed to 'ts_headline'. See the documentation
        for PostgreSQL for more information on the options that can be
        passed to 'ts_headline'.
        """
        if not raw_texts:
            return []
        s = convert_query(query)
        options = ','.join(['%s=%s' % (k, v) for k, v in options.items()])

        value_clauses = ', '.join(('(%s)',) * len(raw_texts))
        stmt = """
        SELECT ts_headline(%%s, doc.text, to_tsquery(%%s, %%s), %%s)
        FROM (VALUES %s) AS doc (text)
        """ % value_clauses
        cursor = self.cursor
        params = (self.ts_config, self.ts_config, s, options)
        cursor.execute(stmt, params + tuple(raw_texts))
        return [
            summary.decode(self.connection.encoding)
            for (summary,) in cursor.fetchall()]
Esempio n. 2
0
    def get_contextual_summaries(self, raw_texts, query, **options):
        """Get a contextual summary for each search result.

        Produces a list of the same length as the raw_texts sequence.
        For each raw_text, returns snippets of text with the words in
        the query highlighted using HTML tags. Calls the
        PostgreSQL function 'ts_headline'. Options are turned into an
        options string passed to 'ts_headline'. See the documentation
        for PostgreSQL for more information on the options that can be
        passed to 'ts_headline'.
        """
        if not raw_texts:
            return []
        s = convert_query(query)
        options = ",".join(["%s=%s" % (k, v) for k, v in options.items()])

        value_clauses = ", ".join(("(%s)",) * len(raw_texts))
        stmt = (
            """
        SELECT ts_headline(%%s, doc.text, to_tsquery(%%s, %%s), %%s)
        FROM (VALUES %s) AS doc (text)
        """
            % value_clauses
        )
        cursor = self.cursor
        params = (self.ts_config, self.ts_config, s, options)
        cursor.execute(stmt, params + tuple(raw_texts))
        return [summary.decode(self.connection.encoding) for (summary,) in cursor.fetchall()]
 def _call(self, query):
     from repoze.pgtextindex.queryconvert import convert_query
     return convert_query(query)
Esempio n. 4
0
    def _run_query(self, query, invert=False, docids=None):
        kw = {
            'table': self.table,
            'weight': '',
            'not': '',
            'filter': '',
            'limit': '',
            'offset': '',
            'max_ranked': self.max_ranked,
        }

        if invert:
            kw['not'] = 'NOT'

        cache = None

        if IWeightedQuery.providedBy(query):

            if getattr(query, 'cache_enabled', False):
                cache_key = (invert, docids)
                cache = getattr(query, 'cache', None)
                if cache is None:
                    query.cache = cache = {}
                result = cache.get(cache_key)
                if result is not None:
                    # Cache hit.
                    return result

            kw['weight'] = "'{%s, %s, %s, %s}', "
            text = getattr(query, 'text', None)
            if text is None:
                text = '%s' % query  # Use __str__()
            cq = convert_query(text)
            params = [
                self.ts_config,
                cq,
                getattr(query, 'D', 0.1),
                getattr(query, 'C', 0.2),
                getattr(query, 'B', 0.4),
                getattr(query, 'A', 1.0),
                self.ts_config,
                cq,
            ]
            marker = getattr(query, 'marker', None)
            if marker:
                # Match any marker value.
                if isinstance(marker, basestring):
                    marker = [marker]
                kw['filter'] += " AND marker && %s::character varying[]"
                params.insert(2, marker)
            limit = getattr(query, 'limit', None)
            if limit:
                kw['limit'] = "LIMIT %s"
                params.append(limit)
            offset = getattr(query, 'offset', None)
            if offset:
                kw['offset'] = "OFFSET %s"
                params.append(offset)
        else:
            cq = convert_query(query)
            params = (self.ts_config, cq, self.ts_config, cq)

        if docids is not None:
            docidstr = ','.join(str(docid) for docid in docids)
            kw['filter'] += ' AND docid IN (%s)' % docidstr

        stmt = """
        WITH _filtered AS (
            SELECT docid, coefficient, text_vector
            FROM %(table)s
            WHERE %(not)s(text_vector @@ to_tsquery(%%s, %%s)) %(filter)s),
        _counter AS (SELECT count(1) AS n FROM _filtered),
        _ranked AS (
            SELECT docid, coefficient * (
                CASE WHEN n <= %(max_ranked)s THEN
                    ts_rank_cd(%(weight)stext_vector, to_tsquery(%%s, %%s))
                ELSE 1 END) AS rank
            FROM _filtered, _counter)
        SELECT docid, rank
        FROM _ranked
        ORDER BY rank DESC
        %(limit)s
        %(offset)s
        """ % kw

        cursor = self.cursor
        cursor.execute(stmt, tuple(params))
        result = self.family.IF.BTree()
        result.update(cursor.fetchall())

        if cache is not None:
            cache[cache_key] = result

        return result
Esempio n. 5
0
    def _run_query(self, query, invert=False, docids=None):
        kw = {
            "table": self.table,
            "weight": "",
            "not": "",
            "filter": "",
            "limit": "",
            "offset": "",
            "max_ranked": self.max_ranked,
        }

        if invert:
            kw["not"] = "NOT"

        cache = None

        if IWeightedQuery.providedBy(query):

            if getattr(query, "cache_enabled", False):
                cache_key = (invert, docids)
                cache = getattr(query, "cache", None)
                if cache is None:
                    query.cache = cache = {}
                result = cache.get(cache_key)
                if result is not None:
                    # Cache hit.
                    return result

            kw["weight"] = "'{%s, %s, %s, %s}', "
            text = getattr(query, "text", None)
            if text is None:
                text = "%s" % query  # Use __str__()
            cq = convert_query(text)
            params = [
                self.ts_config,
                cq,
                getattr(query, "D", 0.1),
                getattr(query, "C", 0.2),
                getattr(query, "B", 0.4),
                getattr(query, "A", 1.0),
                self.ts_config,
                cq,
            ]
            marker = getattr(query, "marker", None)
            if marker:
                # Match any marker value.
                if isinstance(marker, basestring):
                    marker = [marker]
                kw["filter"] += " AND marker && %s::character varying[]"
                params.insert(2, marker)
            limit = getattr(query, "limit", None)
            if limit:
                kw["limit"] = "LIMIT %s"
                params.append(limit)
            offset = getattr(query, "offset", None)
            if offset:
                kw["offset"] = "OFFSET %s"
                params.append(offset)
        else:
            cq = convert_query(query)
            params = (self.ts_config, cq, self.ts_config, cq)

        if docids is not None:
            docidstr = ",".join(str(docid) for docid in docids)
            kw["filter"] += " AND docid IN (%s)" % docidstr

        stmt = (
            """
        WITH _filtered AS (
            SELECT docid, coefficient, text_vector
            FROM %(table)s
            WHERE %(not)s(text_vector @@ to_tsquery(%%s, %%s)) %(filter)s),
        _counter AS (SELECT count(1) AS n FROM _filtered),
        _ranked AS (
            SELECT docid, coefficient * (
                CASE WHEN n <= %(max_ranked)s THEN
                    ts_rank_cd(%(weight)stext_vector, to_tsquery(%%s, %%s))
                ELSE 1 END) AS rank
            FROM _filtered, _counter)
        SELECT docid, rank
        FROM _ranked
        ORDER BY rank DESC
        %(limit)s
        %(offset)s
        """
            % kw
        )

        cursor = self.cursor
        cursor.execute(stmt, tuple(params))
        result = self.family.IF.BTree()
        result.update(cursor.fetchall())

        if cache is not None:
            cache[cache_key] = result

        return result
    def _call(self, query):
        from repoze.pgtextindex.queryconvert import convert_query

        return convert_query(query)