def _search_query_text(query, **kwargs): """Prepare the initial query, searching by text and ranking by the proximity.""" language = kwargs.get('language') text = kwargs.get('text') if text: if language is not None: to_tsquery = func.to_tsquery(SearchItem.getDictForLanguage(language), text) else: to_tsquery = func.to_tsquery(text) query = query.filter(SearchItem.terms.op('@@')(to_tsquery)) return query
def build_query(params, full=False): columns = [users.c.id, users.c.name, users.c.email_address] if full \ else [users.c.id] query = select(columns) query = query.order_by(users.c.id) if 'email' in params: query = query.where(users.c.email_address == params['email']) if 'name' in params: search_string = " & ".join( "{}:*".format(word) for word in params['name'].split(' ') ) name_vector = func.to_tsvector(users.c.name) query = query.where(name_vector.match(search_string)) query = query.order_by( desc(func.ts_rank(name_vector, func.to_tsquery(search_string))) ) # if 'page_size' in params: # query = query.limit(params['page_size']) return query
def by_prefix(cls, prefix): ''' Find search terms with a given prefix. ''' # In our SQLAlchemy version the language cannot be specified in the # `match` function, so we construct the query explicitly. See # https://bitbucket.org/zzzeek/sqlalchemy/issues/3078 tsquery = func.to_tsquery('pg_catalog.simple', "'{}':*".format(prefix)) return cls.filter(cls.term_tsvector.op('@@')(tsquery))
def _search_query_rank(query, **kwargs): """ Rank query results, sorting by search rank for most content types and integrating the rating for subjects. """ rank_func = None text = kwargs.get('text') language = kwargs.get('language') rank_cutoff = kwargs.get('rank_cutoff') if language is not None: rank_func = func.ts_rank_cd(SearchItem.terms, func.to_tsquery(SearchItem.getDictForLanguage(language), text)) else: rank_func = func.ts_rank_cd(SearchItem.terms, func.to_tsquery(text)) if rank_cutoff is not None: query = query.filter(rank_func >= rank_cutoff) if kwargs.get('use_rating'): query = query.order_by((SearchItem.rating * rank_func).desc()) else: query = query.order_by(rank_func.desc()) return query
def construct_fulltext_query_and_rank(searched): # Text version of the text search query directly constructed from # the searched string simple_ts_query_text = cast(func.plainto_tsquery('english', searched), TEXT) # Text version of the search query using prefix search for the last # word in the base query # Case expression is necessary to capture the case of empty base query # (arising e.g. when the query consists entirely of special characters # and stop words) prefix_ts_query_text = case([(simple_ts_query_text == "", cast("", TEXT))], else_=simple_ts_query_text.op('||')(cast( ":*", TEXT))) # Final text search query ts_query = func.to_tsquery('english', prefix_ts_query_text) # Rank for each search result ts_rank = func.ts_rank_cd(Post.__ts_vector__, ts_query).label("rank") return (ts_query, ts_rank)
def add_simple_text_search(query, text_columns, keywords, include_rank=True): rank = None keywords_j = ' & '.join(keywords) fts_config = 'simple' filters = [ func.to_tsvector(fts_config, column).match(keywords_j) for column in text_columns ] if len(filters) > 1: filter = or_(*filters) else: filter = filters[0] query = query.filter(filter) if include_rank: ranks = [ func.ts_rank(func.to_tsvector(fts_config, column), func.to_tsquery(fts_config, keywords_j)) for column in text_columns ] rank = reduce(lambda a, b: a + b, ranks) query = query.add_column(rank.label('score')) return query, rank
def search_query(q, search): # works only for pg backend search = preprocess_search_query(search) return q.filter(model.Ebook.full_text.match(search))\ .order_by(desc(func.ts_rank_cd(model.Ebook.full_text, func.to_tsquery(text("'custom'"), search))))
def search_locations(search_string): q = db.session.query(Location).filter(db.text('location.search_column @@ to_tsquery(:terms)')) q = q.params(terms=search_string) q = q.order_by(db.text('ts_rank_cd(location.search_column, to_tsquery(:terms)) DESC')) q = q.add_column(func.ts_headline('pg_catalog.english',db.text('location.body_text'),func.to_tsquery(search_string),'MaxFragments=1, StartSel = <strong>, StopSel = </strong>')) return [{'object':location_object_to_dict(location), 'fragment':fragment} for location, fragment in q]
def add_text_search(query, join_columns, keywords, locales, include_rank=True, lse=None): from assembl.models.langstrings import LangStringEntry rank = None keywords_j = ' & '.join(keywords) lse = lse or aliased(LangStringEntry) join_conds = [ lse.langstring_id == join_column for join_column in join_columns ] if len(join_conds) > 1: join_cond = or_(*join_conds) else: join_cond = join_conds[0] query = query.join(lse, join_cond) if locales: active_text_indices = get('active_text_indices', 'en') locales_by_config = defaultdict(list) any_locale = 'any' in locales for locale in locales: fts_config = postgres_language_configurations.get(locale, 'simple') if fts_config not in active_text_indices: fts_config = 'simple' locales_by_config[fts_config].append(locale) conds = {} # TODO: to_tsquery vs plainto_tsquery vs phraseto_tsquery for fts_config, locales in locales_by_config.items(): conds[fts_config] = (or_( *[((lse.locale == locale) | lse.locale.like(locale + "_%")) for locale in locales]) if 'any' not in locales else None, func.to_tsvector(fts_config, lse.value)) filter = [ cond & v.match(keywords_j, postgresql_regconfig=conf) for (conf, (cond, v)) in conds.items() if cond is not None ] if any_locale: (_, v) = conds['simple'] filter.append(v.match(keywords_j, postgresql_regconfig='simple')) query = query.filter(or_(*filter)) if include_rank: if len(conds) > 1: if any_locale: (_, v) = conds['simple'] else_case = func.ts_rank( v, func.to_tsquery('simple', keywords_j)) else: else_case = 0 rank = case( [(cond, func.ts_rank(v, func.to_tsquery(conf, keywords_j))) for (conf, (cond, v)) in conds.items() if cond is not None], else_=else_case).label('score') else: (conf, (cond, v)) = next(iter(conds.items())) rank = func.ts_rank(v, func.to_tsquery(conf, keywords_j)).label('score') query = query.add_column(rank) else: fts_config = 'simple' query = query.filter( func.to_tsvector(fts_config, lse.value).match(keywords_j, postgresql_regconfig=fts_config)) if include_rank: rank = func.ts_rank(func.to_tsvector(fts_config, lse.value), func.to_tsquery(fts_config, keywords_j)).label('score') query = query.add_column(rank) return query, rank
def do_search(search_query, offset, result_count, new_domains, framework_slug): try: sort_dir = list(search_query['sort'][0].values())[0]['order'] except (KeyError, IndexError): sort_dir = 'asc' try: sort_by = list(search_query['sort'][0].values())[0]['sort_by'] except (KeyError, IndexError): sort_by = None try: terms = search_query['query']['filtered']['filter']['terms'] except (KeyError, IndexError): terms = {} roles_list = None seller_types_list = None if terms: new_domains = 'prices.serviceRole.role' not in terms try: if new_domains: roles_list = terms['domains.assessed'] else: roles = terms['prices.serviceRole.role'] roles_list = set(_['role'][7:] for _ in roles) except KeyError: pass try: seller_types_list = terms['seller_types'] except: # noqa pass try: search_term = search_query['query']['match_phrase_prefix']['name'] except KeyError: search_term = '' EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING'] if new_domains: q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) else: q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter( Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN, or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if any(c in search_term for c in ['#', '-', '_', '/', '\\']): tsquery = func.phraseto_tsquery(search_term) elif ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column( func.ts_headline( 'english', func.concat(Supplier.summary, ' ', Supplier.data['tools'].astext, ' ', Supplier.data['methodologies'].astext, ' ', Supplier.data['technologies'].astext, ''), tsquery, 'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1' )) q = q.group_by(Supplier.id) try: code = search_query['query']['term']['code'] q = q.filter(Supplier.code == code) except KeyError: pass if roles_list is not None: if new_domains: if EXCLUDE_LEGACY_ROLES: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.filter(SupplierDomain.status == 'assessed') q = q.having(d_agg.contains(array(roles_list))) else: sr_agg = postgres.array_agg( cast(func.substring(ServiceRole.name, 8), TEXT)) q = q.having(sr_agg.contains(array(roles_list))) if seller_types_list is not None and 'recruiter' in seller_types_list: q = q.filter(Supplier.is_recruiter == 'true') seller_types_list.remove('recruiter') if len(seller_types_list) == 0: seller_types_list = None if seller_types_list is not None: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type', )]), whereclause=cast(column('value'), Boolean)).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types_list))) if sort_by: if sort_by == 'latest': ob = [desc(Supplier.last_update_time)] else: ob = [asc(Supplier.name)] else: if sort_dir == 'desc': ob = [desc(Supplier.name)] else: ob = [asc(Supplier.name)] if search_term: ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob q = q.filter(Supplier.text_vector.op('@@')(tsquery)) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): if type(raw_results[x]) is Supplier: result = raw_results[x] else: result = raw_results[x][0] if raw_results[x][1] is not None and raw_results[x][1] != '': result.summary = raw_results[x][1] results.append(result) sliced_results = results[offset:(offset + result_count)] q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter, Supplier.data, Domain.name.label('domain_name'), SupplierDomain.status.label('domain_status'))\ .outerjoin(SupplierDomain, Domain)\ .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\ .order_by(Supplier.name) suppliers = [r._asdict() for r in q] sliced_results = [] for key, group in groupby(suppliers, key=itemgetter('code')): supplier = group.next() supplier['seller_type'] = supplier.get( 'data') and supplier['data'].get('seller_type') supplier['domains'] = {'assessed': [], 'unassessed': []} for s in chain([supplier], group): domain, status = s['domain_name'], s['domain_status'] if domain: if status == 'assessed': supplier['domains']['assessed'].append(domain) else: supplier['domains']['unassessed'].append(domain) for e in ['domain_name', 'domain_status', 'data']: supplier.pop(e, None) sliced_results.append(supplier) return sliced_results, len(results)
def casestudies_search(): search_query = get_json_from_request() offset = get_nonnegative_int_or_400(request.args, 'from', 0) result_count = get_positive_int_or_400( request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE']) sort_dir = search_query.get('sort_dir', 'asc') sort_by = search_query.get('sort_by', None) domains = search_query.get('domains', None) seller_types = search_query.get('seller_types', None) search_term = search_query.get('search_term', None) framework_slug = request.args.get('framework', 'digital-marketplace') q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter( Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column( func.ts_headline( 'english', func.concat(CaseStudy.data['approach'].astext, ' ', CaseStudy.data['role'].astext), tsquery, 'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " ' )) else: q = q.add_column("''") q = q.add_column(Supplier.name) q = q.add_column(postgres.array_agg(Supplier.data)) q = q.group_by(CaseStudy.id, Supplier.name) if domains: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(domains))) if seller_types: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type', )]), whereclause=cast(column('value'), Boolean)).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types))) if sort_dir in ('desc', 'z-a'): ob = [desc(CaseStudy.data['title'].astext)] else: ob = [asc(CaseStudy.data['title'].astext)] if search_term: ob = [ desc( func.ts_rank_cd( func.to_tsvector( func.concat( Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)), tsquery)) ] + ob condition = func.to_tsvector( func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)).op('@@')(tsquery) q = q.filter(condition) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): result = raw_results[x][0].serialize() if raw_results[x][1] is not None and raw_results[x][1] != '': result['approach'] = raw_results[x][1] if raw_results[x][2] is not None: result['supplierName'] = raw_results[x][2] if raw_results[x][3] is not None and raw_results[x][3][0] is not None: result['seller_type'] = raw_results[x][3][0].get('seller_type') results.append(result) total_results = len(results) sliced_results = results[offset:(offset + result_count)] result = { 'hits': { 'total': total_results, 'hits': [{ '_source': r } for r in sliced_results] } } try: response = jsonify(result), 200 except Exception as e: response = jsonify(message=str(e)), 500 return response
def do_search(search_query, offset, result_count, new_domains, framework_slug): try: sort_dir = list(search_query['sort'][0].values())[0]['order'] except (KeyError, IndexError): sort_dir = 'asc' try: sort_by = list(search_query['sort'][0].values())[0]['sort_by'] except (KeyError, IndexError): sort_by = None try: terms = search_query['query']['filtered']['filter']['terms'] except (KeyError, IndexError): terms = {} roles_list = None seller_types_list = None if terms: new_domains = 'prices.serviceRole.role' not in terms try: if new_domains: roles_list = terms['domains.assessed'] else: roles = terms['prices.serviceRole.role'] roles_list = set(_['role'][7:] for _ in roles) except KeyError: pass try: seller_types_list = terms['seller_types'] except: # noqa pass try: search_term = search_query['query']['match_phrase_prefix']['name'] except KeyError: search_term = '' EXCLUDE_LEGACY_ROLES = not current_app.config['LEGACY_ROLE_MAPPING'] if new_domains: q = db.session.query(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) else: q = db.session.query(Supplier).outerjoin(PriceSchedule).outerjoin(ServiceRole) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter(Supplier.status != 'deleted', Supplier.abn != Supplier.DUMMY_ABN, or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if any(c in search_term for c in ['#', '-', '_', '/', '\\']): tsquery = func.phraseto_tsquery(search_term) elif ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column(func.ts_headline( 'english', func.concat(Supplier.summary, ' ', Supplier.data['tools'].astext, ' ', Supplier.data['methodologies'].astext, ' ', Supplier.data['technologies'].astext, ''), tsquery, 'MaxWords=25, MinWords=20, ShortWord=3, HighlightAll=FALSE, MaxFragments=1' )) q = q.group_by(Supplier.id) try: code = search_query['query']['term']['code'] q = q.filter(Supplier.code == code) except KeyError: pass if roles_list is not None: if new_domains: if EXCLUDE_LEGACY_ROLES: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(roles_list))) else: sr_agg = postgres.array_agg(cast(func.substring(ServiceRole.name, 8), TEXT)) q = q.having(sr_agg.contains(array(roles_list))) if seller_types_list is not None and 'recruiter' in seller_types_list: q = q.filter(Supplier.is_recruiter == 'true') seller_types_list.remove('recruiter') if len(seller_types_list) == 0: seller_types_list = None if seller_types_list is not None: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type',)]), whereclause=cast(column('value'), Boolean) ).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types_list))) if sort_by: if sort_by == 'latest': ob = [desc(Supplier.last_update_time)] else: ob = [asc(Supplier.name)] else: if sort_dir == 'desc': ob = [desc(Supplier.name)] else: ob = [asc(Supplier.name)] if search_term: ob = [desc(func.ts_rank_cd(Supplier.text_vector, tsquery))] + ob q = q.filter(Supplier.text_vector.op('@@')(tsquery)) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): if type(raw_results[x]) is Supplier: result = raw_results[x] else: result = raw_results[x][0] if raw_results[x][1] is not None and raw_results[x][1] != '': result.summary = raw_results[x][1] results.append(result) sliced_results = results[offset:(offset + result_count)] q = db.session.query(Supplier.code, Supplier.name, Supplier.summary, Supplier.is_recruiter, Supplier.data, Domain.name.label('domain_name'), SupplierDomain.status.label('domain_status'))\ .outerjoin(SupplierDomain, Domain)\ .filter(Supplier.id.in_([sr.id for sr in sliced_results]))\ .order_by(Supplier.name) suppliers = [r._asdict() for r in q] sliced_results = [] for key, group in groupby(suppliers, key=itemgetter('code')): supplier = group.next() supplier['seller_type'] = supplier.get('data') and supplier['data'].get('seller_type') supplier['domains'] = {'assessed': [], 'unassessed': []} for s in chain([supplier], group): domain, status = s['domain_name'], s['domain_status'] if domain: if status == 'assessed': supplier['domains']['assessed'].append(domain) else: supplier['domains']['unassessed'].append(domain) for e in ['domain_name', 'domain_status', 'data']: supplier.pop(e, None) sliced_results.append(supplier) return sliced_results, len(results)
def casestudies_search(): search_query = get_json_from_request() offset = get_nonnegative_int_or_400(request.args, 'from', 0) result_count = get_positive_int_or_400(request.args, 'size', current_app.config['DM_API_SUPPLIERS_PAGE_SIZE']) sort_dir = search_query.get('sort_dir', 'asc') sort_by = search_query.get('sort_by', None) domains = search_query.get('domains', None) seller_types = search_query.get('seller_types', None) search_term = search_query.get('search_term', None) framework_slug = request.args.get('framework', 'digital-marketplace') q = db.session.query(CaseStudy).join(Supplier).outerjoin(SupplierDomain).outerjoin(Domain) \ .outerjoin(SupplierFramework).outerjoin(Framework) q = q.filter(Supplier.status != 'deleted', or_(Framework.slug == framework_slug, ~Supplier.frameworks.any())) tsquery = None if search_term: if ' ' in search_term: tsquery = func.plainto_tsquery(search_term) else: tsquery = func.to_tsquery(search_term + ":*") q = q.add_column(func.ts_headline( 'english', func.concat( CaseStudy.data['approach'].astext, ' ', CaseStudy.data['role'].astext), tsquery, 'MaxWords=150, MinWords=75, ShortWord=3, HighlightAll=FALSE, FragmentDelimiter=" ... " ' )) else: q = q.add_column("''") q = q.add_column(Supplier.name) q = q.add_column(postgres.array_agg(Supplier.data)) q = q.group_by(CaseStudy.id, Supplier.name) if domains: d_agg = postgres.array_agg(cast(Domain.name, TEXT)) q = q.having(d_agg.contains(array(domains))) if seller_types: selected_seller_types = select( [postgres.array_agg(column('key'))], from_obj=func.json_each_text(Supplier.data[('seller_type',)]), whereclause=cast(column('value'), Boolean) ).as_scalar() q = q.filter(selected_seller_types.contains(array(seller_types))) if sort_dir in ('desc', 'z-a'): ob = [desc(CaseStudy.data['title'].astext)] else: ob = [asc(CaseStudy.data['title'].astext)] if search_term: ob = [desc(func.ts_rank_cd(func.to_tsvector( func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)), tsquery))] + ob condition = func.to_tsvector(func.concat(Supplier.name, CaseStudy.data['title'].astext, CaseStudy.data['approach'].astext)).op('@@')(tsquery) q = q.filter(condition) q = q.order_by(*ob) raw_results = list(q) results = [] for x in range(len(raw_results)): result = raw_results[x][0].serialize() if raw_results[x][1] is not None and raw_results[x][1] != '': result['approach'] = raw_results[x][1] if raw_results[x][2] is not None: result['supplierName'] = raw_results[x][2] if raw_results[x][3] is not None and raw_results[x][3][0] is not None: result['seller_type'] = raw_results[x][3][0].get('seller_type') results.append(result) total_results = len(results) sliced_results = results[offset:(offset + result_count)] result = { 'hits': { 'total': total_results, 'hits': [{'_source': r} for r in sliced_results] } } try: response = jsonify(result), 200 except Exception as e: response = jsonify(message=str(e)), 500 return response