def fetch_content(query_text, column, text_column, page, sources=None): session = g.session tsq = build_tsquery(query_text) search = None if column == db.WebPages.title: query = session \ .query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \ .filter( \ func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english') \ ) \ .order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc()) elif column == db.WebPages.tsv_content: query = session \ .query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq))) \ .filter( column.match(tsq) ) if "'" in query_text or '"' in query_text: search = query_text.replace("!", " ").replace("?", " ").replace( "'", " ").replace('"', " ").replace(',', " ").replace('.', " ").strip() while " " in search: search = search.replace(" ", " ") search = search.strip() search = '%{}%'.format(search.lower()) query = query.filter(func.lower(text_column).like(search)) query = query.order_by( func.ts_rank_cd(column, func.to_tsquery(tsq)).desc()) if sources: query = query.filter(db.WebPages.netloc.in_(sources)) else: raise ValueError("Wat?") print(str(query.statement.compile(dialect=postgresql.dialect()))) print("param: '%s', '%s', '%s'" % (tsq, sources, search)) try: entries = paginate(query, page, per_page=50) except sqlalchemy.exc.ProgrammingError: traceback.print_exc() print("ProgrammingError - Rolling back!") g.session.rollback() raise except sqlalchemy.exc.InternalError: traceback.print_exc() print("InternalError - Rolling back!") g.session.rollback() raise except sqlalchemy.exc.OperationalError: traceback.print_exc() print("InternalError - Rolling back!") g.session.rollback() raise return entries
def fetch_content(query_text, column, text_column, page, sources=None): session = g.session tsq = build_tsquery(query_text) search = None if column == db.WebPages.title: query = session \ .query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \ .filter( \ func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english') \ ) \ .order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc()) elif column == db.WebPages.tsv_content: query = session \ .query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq))) \ .filter( column.match(tsq) ) if "'" in query_text or '"' in query_text: search = query_text.replace("!", " ").replace("?", " ").replace("'", " ").replace('"', " ").replace(',', " ").replace('.', " ").strip() while " " in search: search = search.replace(" ", " ") search = search.strip() search = '%{}%'.format(search.lower()) query = query.filter( func.lower(text_column).like(search) ) query = query.order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc()) if sources: query = query.filter(db.WebPages.netloc.in_(sources)) else: raise ValueError("Wat?") print(str(query.statement.compile(dialect=postgresql.dialect()))) print("param: '%s', '%s', '%s'" % (tsq, sources, search)) try: entries = paginate(query, page, per_page=50) except sqlalchemy.exc.ProgrammingError: traceback.print_exc() print("ProgrammingError - Rolling back!") g.session.rollback() raise except sqlalchemy.exc.InternalError: traceback.print_exc() print("InternalError - Rolling back!") g.session.rollback() raise except sqlalchemy.exc.OperationalError: traceback.print_exc() print("InternalError - Rolling back!") g.session.rollback() raise return entries
def search(): with ScopedSession() as session: q = flask.request.args.get('query') query = session.query(Comment, User, Post, func.ts_headline('russian', Comment.text, func.plainto_tsquery('russian', q), 'HighlightAll=true').label('highlighted')).filter(Comment.user_id == User.user_id).filter(Comment.post_id == Post.post_id) query = query.filter(func.to_tsvector('russian', Comment.text).op('@@')(func.plainto_tsquery('russian', q))) comments = [] for comment, user, post, highlighted in query.order_by(func.ts_rank_cd(func.to_tsvector('russian', Comment.text), func.plainto_tsquery('russian', q)).desc(), Comment.posted.desc()).limit(100).all(): comments.append({ "id": comment.comment_id, "parent_id": comment.parent_id, "post_id": comment.post_id, "text": normalize_text(highlighted), "posted": comment.posted.strftime(DATE_FORMAT), "user_id": user.user_id, "user_name": user.name, "user_avatar": user.avatar_hash, "comment_list_id": post.comment_list_id }) resp = app.make_response(json.dumps(comments, ensure_ascii=False)) resp.mimetype = 'application/json; charset=utf-8' resp.headers['Access-Control-Allow-Origin'] = '*' return resp
def fetch_content(query_text, column, page, sources=None): session = db.get_session() tsq = build_tsquery(query_text) if column == db.WebPages.title: query = session \ .query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \ .filter( \ func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english') \ ) \ .order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc()) elif column == db.WebPages.tsv_content: query = session \ .query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq))) \ .filter( column.match(tsq) ) \ .order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc()) if sources: query = query.filter(db.WebPages.netloc.in_(sources)) else: raise ValueError("Wat?") # print(str(query.statement.compile(dialect=postgresql.dialect()))) # print("param: '%s'" % tsq) try: entries = paginate(query, page, per_page=50) except sqlalchemy.exc.ProgrammingError: traceback.print_exc() print("ProgrammingError - Rolling back!") db.get_session().rollback() raise except sqlalchemy.exc.InternalError: traceback.print_exc() print("InternalError - Rolling back!") db.get_session().rollback() raise except sqlalchemy.exc.OperationalError: traceback.print_exc() print("InternalError - Rolling back!") db.get_session().rollback() raise return entries
def search(): search_query = request.args.get('search') search_query = search_query.replace(" ", "&") search_query = search_query.replace("+", "&") shows = db.session.query( Show.id, Show.title, Show.description, func.ts_rank_cd(Show.tsv_searchable_text, func.plainto_tsquery(search_query, postgresql_regconfig='english'), 32).label('rank') ).filter( Show.tsv_searchable_text.match(search_query, postgresql_regconfig='english') ).order_by(db.text('rank DESC')).limit(10) data = [] for show in shows: data.append({ 'id': show.id, 'title': show.title, 'description': show.description, 'rank': show.rank }) return jsonify(data)
def fetch_all_by_tsquery(self, tsquery, *expr, **kwargs): """ Returns all structures whose abstract matches the keywords given in the keyword string. Parameters ---------- tsquery : str A string containing the lexme that will be used for searching. The string will be automatically casted into a tsquery through the to_tsquery() function. weights : list, default=[0.1, 0.2, 0.4, 1.0] The weights specify how heavily to weigh each category of word, in the order [D-weight, C-weight, B-weight, A-weight]. normalization : int, default=32 Option that specifies whether and how a document's length should impact its rank. The integer option controls several behaviors, so it is a bit mask: you can specify one or more behaviors using | (for example, 2|4). - 0 ignores the document length - 1 divides the rank by 1 + the logarithm of the document length - 2 divides the rank by the document length - 4 divides the rank by the mean harmonic distance between extents (this is implemented only by ts_rank_cd) - 8 divides the rank by the number of unique words in document - 16 divides the rank by 1 + the logarithm of the number of unique words in document - 32 divides the rank by itself + 1 min_words : int, default=10 max_words : int, default=25 Returns ------- resultset : list List containing tuples in the form (Structure, snippet, rank). The snippet is the part of the abstract that matches the query. Ordered by rank. Notes ----- - http://www.postgresql.org/docs/current/interactive/datatype-textsearch.html Examples -------- >>> StructureAdaptor().fetch_all_by_tsquery('imatinib') [(<Structure(3FW1)>, u'[Imatinib] represents the first in a class', 0.54545500000000002), (<Structure(2GQG)>, u'[imatinib], an inhibitor of BCR-ABL. Although', 0.375), (<Structure(3CS9)>, u'[imatinib] against [imatinib]-resistant Bcr-Abl. Consistent', 0.33333299999999999), (<Structure(2HYY)>, u'[imatinib] as first-line therapy', 0.230769), (<Structure(3G0F)>, u'[imatinib] mesylate and sunitinib malate', 0.230769), (<Structure(3G0E)>, u'[imatinib] mesylate and sunitinib malate', 0.230769), (<Structure(1T46)>, u'[Imatinib] or Gleevec) demonstrates that', 0.090909100000000007)] """ weights = kwargs.get('weights', [0.1, 0.2, 0.4, 1.0]) normalization = kwargs.get('normalization', 32) min_words, max_words = kwargs.get('min_words', 10), kwargs.get('max_words', 25) if kwargs.get('plain'): tsquery = func.plainto_tsquery('english', tsquery) else: tsquery = func.to_tsquery('english', tsquery) # basic query construct query = self.query.join('XRefs').filter(and_(*expr)) query = query.join( citations, and_(citations.c.pubmed_id == cast(XRef.xref, Integer), XRef.source == 'PubMed')) # GIST index that is used for searching index = func.to_tsvector('english', citations.c.abstract).op('@@')(tsquery) query = query.filter(index).order_by("rank DESC") # calculated rank of the hit rank = func.ts_rank_cd( weights, func.to_tsvector('english', citations.c.abstract), tsquery, normalization).label('rank') query = query.add_column(rank) if kwargs.get('snippet'): # configuration of the preview snippet headline_conf = 'StartSel=[, StopSel=], MaxWords={0}, MinWords={1}'.format( max_words, min_words) # function to create a preview snippet of the matched area snippet = func.ts_headline('english', citations.c.abstract, tsquery, headline_conf).label('snippet') # add snippet column to query query = query.add_column(snippet) return query