Example #1
0
def fetch_content(query_text, column, text_column, page, sources=None):
    session = g.session
    tsq = build_tsquery(query_text)
    search = None
    if column == db.WebPages.title:
        query = session                                                                                         \
          .query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
          .filter(                                                                                        \
           func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
           )                                                                                           \
          .order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

    elif column == db.WebPages.tsv_content:
        query = session                                                                                         \
          .query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
          .filter( column.match(tsq) )

        if "'" in query_text or '"' in query_text:
            search = query_text.replace("!", " ").replace("?", " ").replace(
                "'", " ").replace('"', " ").replace(',',
                                                    " ").replace('.',
                                                                 " ").strip()
            while "  " in search:
                search = search.replace("  ", " ")
            search = search.strip()
            search = '%{}%'.format(search.lower())
            query = query.filter(func.lower(text_column).like(search))

        query = query.order_by(
            func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

        if sources:
            query = query.filter(db.WebPages.netloc.in_(sources))

    else:
        raise ValueError("Wat?")

    print(str(query.statement.compile(dialect=postgresql.dialect())))
    print("param: '%s', '%s', '%s'" % (tsq, sources, search))

    try:
        entries = paginate(query, page, per_page=50)

    except sqlalchemy.exc.ProgrammingError:
        traceback.print_exc()
        print("ProgrammingError - Rolling back!")
        g.session.rollback()
        raise
    except sqlalchemy.exc.InternalError:
        traceback.print_exc()
        print("InternalError - Rolling back!")
        g.session.rollback()
        raise
    except sqlalchemy.exc.OperationalError:
        traceback.print_exc()
        print("InternalError - Rolling back!")
        g.session.rollback()
        raise

    return entries
def fetch_content(query_text, column, text_column, page, sources=None):
	session = g.session
	tsq = build_tsquery(query_text)
	search = None
	if column == db.WebPages.title:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
				.filter(                                                                                        \
					func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
					)                                                                                           \
				.order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

	elif column == db.WebPages.tsv_content:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
				.filter( column.match(tsq) )

		if "'" in query_text or '"' in query_text:
			search = query_text.replace("!", " ").replace("?", " ").replace("'", " ").replace('"', " ").replace(',', " ").replace('.', " ").strip()
			while "  " in search:
				search = search.replace("  ", " ")
			search = search.strip()
			search = '%{}%'.format(search.lower())
			query = query.filter( func.lower(text_column).like(search) )

		query = query.order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

		if sources:
			query = query.filter(db.WebPages.netloc.in_(sources))

	else:
		raise ValueError("Wat?")

	print(str(query.statement.compile(dialect=postgresql.dialect())))
	print("param: '%s', '%s', '%s'" % (tsq, sources, search))

	try:
		entries = paginate(query, page, per_page=50)

	except sqlalchemy.exc.ProgrammingError:
		traceback.print_exc()
		print("ProgrammingError - Rolling back!")
		g.session.rollback()
		raise
	except sqlalchemy.exc.InternalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		g.session.rollback()
		raise
	except sqlalchemy.exc.OperationalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		g.session.rollback()
		raise

	return entries
Example #3
0
def search():
    with ScopedSession() as session:
        q = flask.request.args.get('query')

        query = session.query(Comment, User, Post, func.ts_headline('russian', Comment.text, func.plainto_tsquery('russian', q), 'HighlightAll=true').label('highlighted')).filter(Comment.user_id == User.user_id).filter(Comment.post_id == Post.post_id)

        query = query.filter(func.to_tsvector('russian', Comment.text).op('@@')(func.plainto_tsquery('russian', q)))

        comments = []

        for comment, user, post, highlighted in query.order_by(func.ts_rank_cd(func.to_tsvector('russian', Comment.text), func.plainto_tsquery('russian', q)).desc(), Comment.posted.desc()).limit(100).all():
            comments.append({
                "id": comment.comment_id,
                "parent_id": comment.parent_id,
                "post_id": comment.post_id,
                "text": normalize_text(highlighted),
                "posted": comment.posted.strftime(DATE_FORMAT),
                "user_id": user.user_id,
                "user_name": user.name,
                "user_avatar": user.avatar_hash,
                "comment_list_id": post.comment_list_id
            })

    resp = app.make_response(json.dumps(comments, ensure_ascii=False))
    resp.mimetype = 'application/json; charset=utf-8'
    resp.headers['Access-Control-Allow-Origin'] = '*'

    return resp
def fetch_content(query_text, column, page, sources=None):
	session = db.get_session()
	tsq = build_tsquery(query_text)

	if column == db.WebPages.title:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
				.filter(                                                                                        \
					func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
					)                                                                                           \
				.order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

	elif column == db.WebPages.tsv_content:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
				.filter( column.match(tsq) )                                                                    \
				.order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

		if sources:
			query = query.filter(db.WebPages.netloc.in_(sources))

	else:
		raise ValueError("Wat?")

	# print(str(query.statement.compile(dialect=postgresql.dialect())))
	# print("param: '%s'" % tsq)

	try:
		entries = paginate(query, page, per_page=50)

	except sqlalchemy.exc.ProgrammingError:
		traceback.print_exc()
		print("ProgrammingError - Rolling back!")
		db.get_session().rollback()
		raise
	except sqlalchemy.exc.InternalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		db.get_session().rollback()
		raise
	except sqlalchemy.exc.OperationalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		db.get_session().rollback()
		raise

	return entries
Example #5
0
def search():
    search_query = request.args.get('search')
    search_query = search_query.replace(" ", "&")
    search_query = search_query.replace("+", "&")
    shows = db.session.query(
        Show.id,
        Show.title,
        Show.description,
        func.ts_rank_cd(Show.tsv_searchable_text, func.plainto_tsquery(search_query, postgresql_regconfig='english'), 32).label('rank')
    ).filter(
        Show.tsv_searchable_text.match(search_query, postgresql_regconfig='english')
    ).order_by(db.text('rank DESC')).limit(10)

    data = []
    for show in shows:
        data.append({
            'id': show.id,
            'title': show.title,
            'description': show.description,
            'rank': show.rank
        })
    return jsonify(data)
Example #6
0
    def fetch_all_by_tsquery(self, tsquery, *expr, **kwargs):
        """
        Returns all structures whose abstract matches the keywords given in the
        keyword string.

        Parameters
        ----------
        tsquery : str
            A string containing the lexme that will be used for searching. The
            string will be automatically casted into a tsquery through the to_tsquery()
            function.
        weights : list, default=[0.1, 0.2, 0.4, 1.0]
            The weights specify how heavily to weigh each category of word, in the
            order [D-weight, C-weight, B-weight, A-weight].
        normalization : int, default=32
            Option that specifies whether and how a document's length should impact
            its rank. The integer option controls several behaviors, so it is a
            bit mask: you can specify one or more behaviors using | (for example, 2|4).
            -  0 ignores the document length
            -  1 divides the rank by 1 + the logarithm of the document length
            -  2 divides the rank by the document length
            -  4 divides the rank by the mean harmonic distance between extents
                 (this is implemented only by ts_rank_cd)
            -  8 divides the rank by the number of unique words in document
            - 16 divides the rank by 1 + the logarithm of the number of unique
                 words in document
            - 32 divides the rank by itself + 1
        min_words : int, default=10
        max_words : int, default=25

        Returns
        -------
        resultset : list
            List containing tuples in the form (Structure, snippet, rank).
            The snippet is the part of the abstract that matches the query.
            Ordered by rank.

        Notes
        -----
        - http://www.postgresql.org/docs/current/interactive/datatype-textsearch.html

        Examples
        --------
        >>> StructureAdaptor().fetch_all_by_tsquery('imatinib')
        [(<Structure(3FW1)>,  u'[Imatinib] represents the first in a class', 0.54545500000000002),
        (<Structure(2GQG)>, u'[imatinib], an inhibitor of BCR-ABL. Although', 0.375),
        (<Structure(3CS9)>,  u'[imatinib] against [imatinib]-resistant Bcr-Abl. Consistent', 0.33333299999999999),
        (<Structure(2HYY)>, u'[imatinib] as first-line therapy', 0.230769),
        (<Structure(3G0F)>, u'[imatinib] mesylate and sunitinib malate', 0.230769),
        (<Structure(3G0E)>, u'[imatinib] mesylate and sunitinib malate', 0.230769),
        (<Structure(1T46)>, u'[Imatinib] or Gleevec) demonstrates that', 0.090909100000000007)]
        """
        weights = kwargs.get('weights', [0.1, 0.2, 0.4, 1.0])
        normalization = kwargs.get('normalization', 32)
        min_words, max_words = kwargs.get('min_words',
                                          10), kwargs.get('max_words', 25)

        if kwargs.get('plain'):
            tsquery = func.plainto_tsquery('english', tsquery)
        else:
            tsquery = func.to_tsquery('english', tsquery)

        # basic query construct
        query = self.query.join('XRefs').filter(and_(*expr))
        query = query.join(
            citations,
            and_(citations.c.pubmed_id == cast(XRef.xref, Integer),
                 XRef.source == 'PubMed'))

        # GIST index that is used for searching
        index = func.to_tsvector('english',
                                 citations.c.abstract).op('@@')(tsquery)
        query = query.filter(index).order_by("rank DESC")

        # calculated rank of the hit
        rank = func.ts_rank_cd(
            weights, func.to_tsvector('english', citations.c.abstract),
            tsquery, normalization).label('rank')
        query = query.add_column(rank)

        if kwargs.get('snippet'):

            # configuration of the preview snippet
            headline_conf = 'StartSel=[, StopSel=], MaxWords={0}, MinWords={1}'.format(
                max_words, min_words)

            # function to create a preview snippet of the matched area
            snippet = func.ts_headline('english', citations.c.abstract,
                                       tsquery, headline_conf).label('snippet')

            # add snippet column to query
            query = query.add_column(snippet)

        return query