예제 #1
0
def search():
    with ScopedSession() as session:
        q = flask.request.args.get('query')

        query = session.query(Comment, User, Post, func.ts_headline('russian', Comment.text, func.plainto_tsquery('russian', q), 'HighlightAll=true').label('highlighted')).filter(Comment.user_id == User.user_id).filter(Comment.post_id == Post.post_id)

        query = query.filter(func.to_tsvector('russian', Comment.text).op('@@')(func.plainto_tsquery('russian', q)))

        comments = []

        for comment, user, post, highlighted in query.order_by(func.ts_rank_cd(func.to_tsvector('russian', Comment.text), func.plainto_tsquery('russian', q)).desc(), Comment.posted.desc()).limit(100).all():
            comments.append({
                "id": comment.comment_id,
                "parent_id": comment.parent_id,
                "post_id": comment.post_id,
                "text": normalize_text(highlighted),
                "posted": comment.posted.strftime(DATE_FORMAT),
                "user_id": user.user_id,
                "user_name": user.name,
                "user_avatar": user.avatar_hash,
                "comment_list_id": post.comment_list_id
            })

    resp = app.make_response(json.dumps(comments, ensure_ascii=False))
    resp.mimetype = 'application/json; charset=utf-8'
    resp.headers['Access-Control-Allow-Origin'] = '*'

    return resp
예제 #2
0
def fetch_content(query_text, column, text_column, page, sources=None):
    session = g.session
    tsq = build_tsquery(query_text)
    search = None
    if column == db.WebPages.title:
        query = session                                                                                         \
          .query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
          .filter(                                                                                        \
           func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
           )                                                                                           \
          .order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

    elif column == db.WebPages.tsv_content:
        query = session                                                                                         \
          .query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
          .filter( column.match(tsq) )

        if "'" in query_text or '"' in query_text:
            search = query_text.replace("!", " ").replace("?", " ").replace(
                "'", " ").replace('"', " ").replace(',',
                                                    " ").replace('.',
                                                                 " ").strip()
            while "  " in search:
                search = search.replace("  ", " ")
            search = search.strip()
            search = '%{}%'.format(search.lower())
            query = query.filter(func.lower(text_column).like(search))

        query = query.order_by(
            func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

        if sources:
            query = query.filter(db.WebPages.netloc.in_(sources))

    else:
        raise ValueError("Wat?")

    print(str(query.statement.compile(dialect=postgresql.dialect())))
    print("param: '%s', '%s', '%s'" % (tsq, sources, search))

    try:
        entries = paginate(query, page, per_page=50)

    except sqlalchemy.exc.ProgrammingError:
        traceback.print_exc()
        print("ProgrammingError - Rolling back!")
        g.session.rollback()
        raise
    except sqlalchemy.exc.InternalError:
        traceback.print_exc()
        print("InternalError - Rolling back!")
        g.session.rollback()
        raise
    except sqlalchemy.exc.OperationalError:
        traceback.print_exc()
        print("InternalError - Rolling back!")
        g.session.rollback()
        raise

    return entries
예제 #3
0
def fetch_content(query_text, column, text_column, page, sources=None):
	session = g.session
	tsq = build_tsquery(query_text)
	search = None
	if column == db.WebPages.title:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
				.filter(                                                                                        \
					func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
					)                                                                                           \
				.order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

	elif column == db.WebPages.tsv_content:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
				.filter( column.match(tsq) )

		if "'" in query_text or '"' in query_text:
			search = query_text.replace("!", " ").replace("?", " ").replace("'", " ").replace('"', " ").replace(',', " ").replace('.', " ").strip()
			while "  " in search:
				search = search.replace("  ", " ")
			search = search.strip()
			search = '%{}%'.format(search.lower())
			query = query.filter( func.lower(text_column).like(search) )

		query = query.order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

		if sources:
			query = query.filter(db.WebPages.netloc.in_(sources))

	else:
		raise ValueError("Wat?")

	print(str(query.statement.compile(dialect=postgresql.dialect())))
	print("param: '%s', '%s', '%s'" % (tsq, sources, search))

	try:
		entries = paginate(query, page, per_page=50)

	except sqlalchemy.exc.ProgrammingError:
		traceback.print_exc()
		print("ProgrammingError - Rolling back!")
		g.session.rollback()
		raise
	except sqlalchemy.exc.InternalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		g.session.rollback()
		raise
	except sqlalchemy.exc.OperationalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		g.session.rollback()
		raise

	return entries
def fetch_content(query_text, column, page, sources=None):
	session = db.get_session()
	tsq = build_tsquery(query_text)

	if column == db.WebPages.title:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq))) \
				.filter(                                                                                        \
					func.to_tsvector("english", column).match(tsq, postgresql_regconfig='english')              \
					)                                                                                           \
				.order_by(func.ts_rank_cd(func.to_tsvector("english", column), func.to_tsquery(tsq)).desc())

	elif column == db.WebPages.tsv_content:
		query = session                                                                                         \
				.query(db.WebPages, func.ts_rank_cd(column, func.to_tsquery(tsq)))                              \
				.filter( column.match(tsq) )                                                                    \
				.order_by(func.ts_rank_cd(column, func.to_tsquery(tsq)).desc())

		if sources:
			query = query.filter(db.WebPages.netloc.in_(sources))

	else:
		raise ValueError("Wat?")

	# print(str(query.statement.compile(dialect=postgresql.dialect())))
	# print("param: '%s'" % tsq)

	try:
		entries = paginate(query, page, per_page=50)

	except sqlalchemy.exc.ProgrammingError:
		traceback.print_exc()
		print("ProgrammingError - Rolling back!")
		db.get_session().rollback()
		raise
	except sqlalchemy.exc.InternalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		db.get_session().rollback()
		raise
	except sqlalchemy.exc.OperationalError:
		traceback.print_exc()
		print("InternalError - Rolling back!")
		db.get_session().rollback()
		raise

	return entries
예제 #5
0
    def fetch_all_by_tsquery(self, tsquery, *expr, **kwargs):
        """
        Returns all structures whose abstract matches the keywords given in the
        keyword string.

        Parameters
        ----------
        tsquery : str
            A string containing the lexme that will be used for searching. The
            string will be automatically casted into a tsquery through the to_tsquery()
            function.
        weights : list, default=[0.1, 0.2, 0.4, 1.0]
            The weights specify how heavily to weigh each category of word, in the
            order [D-weight, C-weight, B-weight, A-weight].
        normalization : int, default=32
            Option that specifies whether and how a document's length should impact
            its rank. The integer option controls several behaviors, so it is a
            bit mask: you can specify one or more behaviors using | (for example, 2|4).
            -  0 ignores the document length
            -  1 divides the rank by 1 + the logarithm of the document length
            -  2 divides the rank by the document length
            -  4 divides the rank by the mean harmonic distance between extents
                 (this is implemented only by ts_rank_cd)
            -  8 divides the rank by the number of unique words in document
            - 16 divides the rank by 1 + the logarithm of the number of unique
                 words in document
            - 32 divides the rank by itself + 1
        min_words : int, default=10
        max_words : int, default=25

        Returns
        -------
        resultset : list
            List containing tuples in the form (Structure, snippet, rank).
            The snippet is the part of the abstract that matches the query.
            Ordered by rank.

        Notes
        -----
        - http://www.postgresql.org/docs/current/interactive/datatype-textsearch.html

        Examples
        --------
        >>> StructureAdaptor().fetch_all_by_tsquery('imatinib')
        [(<Structure(3FW1)>,  u'[Imatinib] represents the first in a class', 0.54545500000000002),
        (<Structure(2GQG)>, u'[imatinib], an inhibitor of BCR-ABL. Although', 0.375),
        (<Structure(3CS9)>,  u'[imatinib] against [imatinib]-resistant Bcr-Abl. Consistent', 0.33333299999999999),
        (<Structure(2HYY)>, u'[imatinib] as first-line therapy', 0.230769),
        (<Structure(3G0F)>, u'[imatinib] mesylate and sunitinib malate', 0.230769),
        (<Structure(3G0E)>, u'[imatinib] mesylate and sunitinib malate', 0.230769),
        (<Structure(1T46)>, u'[Imatinib] or Gleevec) demonstrates that', 0.090909100000000007)]
        """
        weights = kwargs.get('weights', [0.1, 0.2, 0.4, 1.0])
        normalization = kwargs.get('normalization', 32)
        min_words, max_words = kwargs.get('min_words',
                                          10), kwargs.get('max_words', 25)

        if kwargs.get('plain'):
            tsquery = func.plainto_tsquery('english', tsquery)
        else:
            tsquery = func.to_tsquery('english', tsquery)

        # basic query construct
        query = self.query.join('XRefs').filter(and_(*expr))
        query = query.join(
            citations,
            and_(citations.c.pubmed_id == cast(XRef.xref, Integer),
                 XRef.source == 'PubMed'))

        # GIST index that is used for searching
        index = func.to_tsvector('english',
                                 citations.c.abstract).op('@@')(tsquery)
        query = query.filter(index).order_by("rank DESC")

        # calculated rank of the hit
        rank = func.ts_rank_cd(
            weights, func.to_tsvector('english', citations.c.abstract),
            tsquery, normalization).label('rank')
        query = query.add_column(rank)

        if kwargs.get('snippet'):

            # configuration of the preview snippet
            headline_conf = 'StartSel=[, StopSel=], MaxWords={0}, MinWords={1}'.format(
                max_words, min_words)

            # function to create a preview snippet of the matched area
            snippet = func.ts_headline('english', citations.c.abstract,
                                       tsquery, headline_conf).label('snippet')

            # add snippet column to query
            query = query.add_column(snippet)

        return query
예제 #6
0
    def filter_field(self, field, op, value):
        """
        Applies a filter on a field.

        Notes on 'ne' op:

        Example data: [None, 'john', 'roger']
        ne:john would return only roger (i.e. nulls excluded)
        ne:     would return john and roger


        Notes on  'search' op:

        For some reason, SQLAlchemy uses to_tsquery rather than
        plainto_tsquery for the match operator

        to_tsquery uses operators (&, |, ! etc.) while
        plainto_tsquery tokenises the input string and uses AND between
        tokens, hence plainto_tsquery is what we want here

        For other database back ends, the behaviour of the match
        operator is completely different - see:
        http://docs.sqlalchemy.org/en/rel_1_0/core/sqlelement.html

        :param field:       field name
        :param op:          'eq', 'ne', 'gt', 'lt', 'ge', 'le' or 'search'
        :param value:       comparison value, string or list/tuple
        :return:
        """
        app = self.app
        odm = app.odm()
        query = self.sql_query

        if field.model:
            field_model = self.app.models.get(field.model)
            if not field_model:
                return
            db_model = field_model.db_model()
            field = getattr(db_model, field.name, None)
            if field is None:
                field = getattr(db_model, field_model.id_field, None)
            if not field:
                return
            if field_model.identifier not in self.joins:
                self.joins.add(field_model.identifier)
                query = query.join(db_model)
        elif isinstance(field.field, str):
            field = getattr(self.model.db_model(), field.name, None)
        else:
            field = None

        if not field:
            return

        multiple = isinstance(value, (list, tuple))

        if value == '':
            value = None

        if multiple and op in ('eq', 'ne'):
            if op == 'eq':
                query = query.filter(field.in_(value))
            elif op == 'ne':
                query = query.filter(~field.in_(value))
        else:
            if multiple:
                assert len(value) > 0
                value = value[0]

            if op == 'eq':
                query = query.filter(field == value)
            elif op == 'ne':
                query = query.filter(field != value)
            elif op == 'search':
                dialect_name = odm.binds[odm[self.name].__table__].dialect.name
                if dialect_name == 'postgresql':
                    ts_config = field.info.get(
                        'text_search_config',
                        app.config['DEFAULT_TEXT_SEARCH_CONFIG'])
                    query = query.filter(
                        func.to_tsvector(ts_config,
                                         cast(field, String)).op('@@')(
                                             func.plainto_tsquery(value)))
                else:
                    query = query.filter(field.match(value))
            elif op == 'gt':
                query = query.filter(field > value)
            elif op == 'ge':
                query = query.filter(field >= value)
            elif op == 'lt':
                query = query.filter(field < value)
            elif op == 'le':
                query = query.filter(field <= value)
        self.sql_query = query
        return self
예제 #7
0
    def filter_field(self, field, op, value):
        """
        Applies a filter on a field.

        Notes on 'ne' op:

        Example data: [None, 'john', 'roger']
        ne:john would return only roger (i.e. nulls excluded)
        ne:     would return john and roger


        Notes on  'search' op:

        For some reason, SQLAlchemy uses to_tsquery rather than
        plainto_tsquery for the match operator

        to_tsquery uses operators (&, |, ! etc.) while
        plainto_tsquery tokenises the input string and uses AND between
        tokens, hence plainto_tsquery is what we want here

        For other database back ends, the behaviour of the match
        operator is completely different - see:
        http://docs.sqlalchemy.org/en/rel_1_0/core/sqlelement.html

        :param field:       field name
        :param op:          'eq', 'ne', 'gt', 'lt', 'ge', 'le' or 'search'
        :param value:       comparison value, string or list/tuple
        :return:
        """
        app = self.app
        odm = app.odm()
        query = self.sql_query

        if field.model:
            field_model = self.app.models.get(field.model)
            if not field_model:
                return
            db_model = field_model.db_model()
            field = getattr(db_model, field.name, None)
            if field is None:
                field = getattr(db_model, field_model.id_field, None)
            if not field:
                return
            if field_model.identifier not in self.joins:
                self.joins.add(field_model.identifier)
                query = query.join(db_model)
        elif isinstance(field.field, str):
            field = getattr(self.model.db_model(), field.name, None)
        else:
            field = None

        if not field:
            return

        multiple = isinstance(value, (list, tuple))

        if value == '':
            value = None

        if multiple and op in ('eq', 'ne'):
            if op == 'eq':
                query = query.filter(field.in_(value))
            elif op == 'ne':
                query = query.filter(~field.in_(value))
        else:
            if multiple:
                assert len(value) > 0
                value = value[0]

            if op == 'eq':
                query = query.filter(field == value)
            elif op == 'ne':
                query = query.filter(field != value)
            elif op == 'search':
                dialect_name = odm.binds[odm[self.name].__table__].dialect.name
                if dialect_name == 'postgresql':
                    ts_config = field.info.get(
                        'text_search_config',
                        app.config['DEFAULT_TEXT_SEARCH_CONFIG']
                    )
                    query = query.filter(
                        func.to_tsvector(ts_config, cast(field, String)).op(
                            '@@')(func.plainto_tsquery(value))
                    )
                else:
                    query = query.filter(field.match(value))
            elif op == 'gt':
                query = query.filter(field > value)
            elif op == 'ge':
                query = query.filter(field >= value)
            elif op == 'lt':
                query = query.filter(field < value)
            elif op == 'le':
                query = query.filter(field <= value)
        self.sql_query = query
        return self