Python to_tsqueryの例、sqlalchemy.sql.expression.func.to_tsquery Pythonの例

コード例 #1

0

ファイルを表示

def fetch_content(query_text, column, text_column, page, sources=None):
    session = g.session
    tsq = build_tsquery(query_text)
    search = None
    if column == db.WebPages.title:
        query = session                                                                                         \
          .query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
          .filter(                                                                                        \
           func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
           )                                                                                           \
          .order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

    elif column == db.WebPages.tsv_content:
        query = session                                                                                         \
          .query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
          .filter( column.match(tsq) )

        if "'" in query_text or '"' in query_text:
            search = query_text.replace("!", " ").replace("?", " ").replace(
                "'", " ").replace('"', " ").replace(',',
                                                    " ").replace('.',
                                                                 " ").strip()
            while "  " in search:
                search = search.replace("  ", " ")
            search = search.strip()
            search = '%{}%'.format(search.lower())
            query = query.filter(func.lower(text_column).like(search))

        query = query.order_by(
            func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

        if sources:
            query = query.filter(db.WebPages.netloc.in_(sources))

    else:
        raise ValueError("Wat?")

    print(str(query.statement.compile(dialect=postgresql.dialect())))
    print("param: '%s', '%s', '%s'" % (tsq, sources, search))

    try:
        entries = paginate(query, page, per_page=50)

    except sqlalchemy.exc.ProgrammingError:
        traceback.print_exc()
        print("ProgrammingError - Rolling back!")
        g.session.rollback()
        raise
    except sqlalchemy.exc.InternalError:
        traceback.print_exc()
        print("InternalError - Rolling back!")
        g.session.rollback()
        raise
    except sqlalchemy.exc.OperationalError:
        traceback.print_exc()
        print("InternalError - Rolling back!")
        g.session.rollback()
        raise

    return entries

コード例 #2

0

ファイルを表示

ファイル: search_views.py プロジェクト: MyAnimeDays/ReadableWebProxy

def fetch_content(query_text, column, text_column, page, sources=None):
	session = g.session
	tsq = build_tsquery(query_text)
	search = None
	if column == db.WebPages.title:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
				.filter(                                                                                        \
					func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
					)                                                                                           \
				.order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

	elif column == db.WebPages.tsv_content:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
				.filter( column.match(tsq) )

		if "'" in query_text or '"' in query_text:
			search = query_text.replace("!", " ").replace("?", " ").replace("'", " ").replace('"', " ").replace(',', " ").replace('.', " ").strip()
			while "  " in search:
				search = search.replace("  ", " ")
			search = search.strip()
			search = '%{}%'.format(search.lower())
			query = query.filter( func.lower(text_column).like(search) )

		query = query.order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

		if sources:
			query = query.filter(db.WebPages.netloc.in_(sources))

	else:
		raise ValueError("Wat?")

	print(str(query.statement.compile(dialect=postgresql.dialect())))
	print("param: '%s', '%s', '%s'" % (tsq, sources, search))

	try:
		entries = paginate(query, page, per_page=50)

	except sqlalchemy.exc.ProgrammingError:
		traceback.print_exc()
		print("ProgrammingError - Rolling back!")
		g.session.rollback()
		raise
	except sqlalchemy.exc.InternalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		g.session.rollback()
		raise
	except sqlalchemy.exc.OperationalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		g.session.rollback()
		raise

	return entries

コード例 #3

0

ファイルを表示

ファイル: search_views.py プロジェクト: GodOfConquest/ReadableWebProxy

def fetch_content(query_text, column, page, sources=None):
	session = db.get_session()
	tsq = build_tsquery(query_text)

	if column == db.WebPages.title:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
				.filter(                                                                                        \
					func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
					)                                                                                           \
				.order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

	elif column == db.WebPages.tsv_content:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
				.filter( column.match(tsq) )                                                                    \
				.order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

		if sources:
			query = query.filter(db.WebPages.netloc.in_(sources))

	else:
		raise ValueError("Wat?")

	# print(str(query.statement.compile(dialect=postgresql.dialect())))
	# print("param: '%s'" % tsq)

	try:
		entries = paginate(query, page, per_page=50)

	except sqlalchemy.exc.ProgrammingError:
		traceback.print_exc()
		print("ProgrammingError - Rolling back!")
		db.get_session().rollback()
		raise
	except sqlalchemy.exc.InternalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		db.get_session().rollback()
		raise
	except sqlalchemy.exc.OperationalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		db.get_session().rollback()
		raise

	return entries

コード例 #4

0

ファイルを表示

ファイル: structureadaptor.py プロジェクト: tlb-lab/credoscript

    def fetch_all_by_tsquery(self, tsquery, *expr, **kwargs):
        """
        Returns all structures whose abstract matches the keywords given in the
        keyword string.

        Parameters
        ----------
        tsquery : str
            A string containing the lexme that will be used for searching. The
            string will be automatically casted into a tsquery through the to_tsquery()
            function.
        weights : list, default=[0.1, 0.2, 0.4, 1.0]
            The weights specify how heavily to weigh each category of word, in the
            order [D-weight, C-weight, B-weight, A-weight].
        normalization : int, default=32
            Option that specifies whether and how a document's length should impact
            its rank. The integer option controls several behaviors, so it is a
            bit mask: you can specify one or more behaviors using | (for example, 2|4).
            -  0 ignores the document length
            -  1 divides the rank by 1 + the logarithm of the document length
            -  2 divides the rank by the document length
            -  4 divides the rank by the mean harmonic distance between extents
                 (this is implemented only by ts_rank_cd)
            -  8 divides the rank by the number of unique words in document
            - 16 divides the rank by 1 + the logarithm of the number of unique
                 words in document
            - 32 divides the rank by itself + 1
        min_words : int, default=10
        max_words : int, default=25

        Returns
        -------
        resultset : list
            List containing tuples in the form (Structure, snippet, rank).
            The snippet is the part of the abstract that matches the query.
            Ordered by rank.

        Notes
        -----
        - http://www.postgresql.org/docs/current/interactive/datatype-textsearch.html

        Examples
        --------
        >>> StructureAdaptor().fetch_all_by_tsquery('imatinib')
        [(<Structure(3FW1)>,  u'[Imatinib] represents the first in a class', 0.54545500000000002),
        (<Structure(2GQG)>, u'[imatinib], an inhibitor of BCR-ABL. Although', 0.375),
        (<Structure(3CS9)>,  u'[imatinib] against [imatinib]-resistant Bcr-Abl. Consistent', 0.33333299999999999),
        (<Structure(2HYY)>, u'[imatinib] as first-line therapy', 0.230769),
        (<Structure(3G0F)>, u'[imatinib] mesylate and sunitinib malate', 0.230769),
        (<Structure(3G0E)>, u'[imatinib] mesylate and sunitinib malate', 0.230769),
        (<Structure(1T46)>, u'[Imatinib] or Gleevec) demonstrates that', 0.090909100000000007)]
        """
        weights = kwargs.get('weights', [0.1, 0.2, 0.4, 1.0])
        normalization = kwargs.get('normalization', 32)
        min_words, max_words = kwargs.get('min_words',
                                          10), kwargs.get('max_words', 25)

        if kwargs.get('plain'):
            tsquery = func.plainto_tsquery('english', tsquery)
        else:
            tsquery = func.to_tsquery('english', tsquery)

        # basic query construct
        query = self.query.join('XRefs').filter(and_(*expr))
        query = query.join(
            citations,
            and_(citations.c.pubmed_id == cast(XRef.xref, Integer),
                 XRef.source == 'PubMed'))

        # GIST index that is used for searching
        index = func.to_tsvector('english',
                                 citations.c.abstract).op('@@')(tsquery)
        query = query.filter(index).order_by("rank DESC")

        # calculated rank of the hit
        rank = func.ts_rank_cd(
            weights, func.to_tsvector('english', citations.c.abstract),
            tsquery, normalization).label('rank')
        query = query.add_column(rank)

        if kwargs.get('snippet'):

            # configuration of the preview snippet
            headline_conf = 'StartSel=[, StopSel=], MaxWords={0}, MinWords={1}'.format(
                max_words, min_words)

            # function to create a preview snippet of the matched area
            snippet = func.ts_headline('english', citations.c.abstract,
                                       tsquery, headline_conf).label('snippet')

            # add snippet column to query
            query = query.add_column(snippet)

        return query