def test_not_between_words(self): assert parse_search_query('wars -star') == ( 'wars:* & ! star:*' ) assert parse_search_query(u'äää -ööö') == ( u'äää:* & ! ööö:*' )
def find_businesses(query, limit=None): businesses = (db.business.query().join(db.promotion.Model) .join(db.promotion.Model.tags, db.promotion.Model.categories) .filter( biz_and_promo_vector.match(parse_search_query(query)) ).all()) return businesses
def test_emails_with_email_tokens(self): assert ( parse_search_query( '*****@*****.**', parser=SearchQueryParser(emails_as_tokens=True) ) == '[email protected]:*' )
def simple_search(query): q1 = Article.query.filter(Article.search_vector.match_tsquery(parse_search_query(query))) #q2 = Article.query.filter(Article.authors.any(Author.lastname.ilike(query))) #q2 = Article.query.filter(Article.authors.any(func.lower(Author.lastname) == func.lower(query))) #q = q1.union(q2) return q1.order_by((Article.created + cast("0", Interval)).desc())
def test_supports_custom_parsers(self): assert ( parse_search_query( 'star wars', parser=SearchQueryParser(wildcard='*') ) == 'star* & wars*' )
def simple_search(query): q1 = Article.query.filter( Article.search_vector.match_tsquery(parse_search_query(query))) #q2 = Article.query.filter(Article.authors.any(Author.lastname.ilike(query))) #q2 = Article.query.filter(Article.authors.any(func.lower(Author.lastname) == func.lower(query))) #q = q1.union(q2) return q1.order_by((Article.created + cast("0", Interval)).desc())
def query_venues(query): combined_sv = Venue.search_vector | Work.search_vector primary_search = Venue.query.search(query, sort=True).all() secondary_search = (Venue.query.join(Work).filter( combined_sv.match(parse_search_query(query)))) filtered_venues = [venue for venue in secondary_search \ if venue not in primary_search] results = primary_search + filtered_venues return results
def base_search(query): filter_list = [] remove_list = [] #get list of category flags given #and remove them from the query if '-phd' in query.lower(): remove_list.append(People.phd==None) query = query.replace('-phd','') elif 'phd' in query.lower(): filter_list.append(People.phd) query = query.replace('phd','') if '-staff' in query.lower(): remove_list.append(People.staff==None) query = query.replace('-staff','') elif 'staff' in query.lower(): filter_list.append(People.staff) query = query.replace('staff','') if '-postdoc' in query.lower(): remove_list.append(People.postdoc==None) query = query.replace('-postdoc','') elif 'postdoc' in query.lower(): filter_list.append(People.postdoc) query = query.replace('postdoc','') if '-associate' in query.lower(): remove_list.append(People.associate==None) query = query.replace('-associate','') elif 'associate' in query.lower(): filter_list.append(People.associate) query = query.replace('associate','') #for a simple "query all" function if ('*' in query) or not query.strip(' '): all = People.query.filter(and_(*remove_list)).filter(or_(*filter_list)) return all #for each category, query on the join using combined search vector phd_vec = People.search_vector | PhD.search_vector phd = db.session.query(People).join(PhD).filter(phd_vec.match(parse_search_query(query))) pos_vec = People.search_vector | Positions.search_vector staff = db.session.query(People).join(Staff).join(Positions).filter(pos_vec.match(parse_search_query(query))) associates = db.session.query(People).join(Associates).join(Positions).filter(pos_vec.match(parse_search_query(query))) staff_nopos = People.query.join(Staff).filter(~(Staff.position.any())).search(query) assoc_nopos = People.query.join(Associates).filter(~(Associates.position.any())).search(query) others = People.query.filter(People.staff==None, People.associate==None, People.phd==None).search(query) #union all categories, and filter by category tags union = phd.union(staff, staff_nopos, associates, assoc_nopos, others) union = union.filter(and_(*remove_list)).filter(or_(*filter_list)) return union
def feed_articles(self): # explain (analyze,buffers) select * from article INNER JOIN (select id from article as blah where search_vector @@ to_tsquery('circuit:* & qed:* | qubit:*') union select article_id from articlesauthors as blah where author_id in (54962, 55738, 85464, 85465, 125598, 55921)) on id=blah order by created desc; #select * from (select distinct on (id) * from (select articles.* from articles where search_vector @@ ... union all select a.* from articles a join articlesauthors aa on ... where author_id = any (...)) s1) s2 order by created_at desc; #explain (analyze,buffers) select article.*, (article.id+0) as dummy_article_id from article where search_vector @@ to_tsquery('circuit:* & qed:* | qubit:*') union select a.*, (a.id+0) as dummy_article_id from article a join articlesauthors aa on a.id=aa.article_id where author_id in (54962, 55738, 85464, 85465, 125598, 55921) order by created desc;search_query = parse_search_query(' or '.join([kw.keyword for kw in self.keywords])) #select article.*, (article.id+0) as dummy_article_id from article where search_vector @@ to_tsquery('circuit:* & qed:* | qubit:*') union select a.*, (a.id+0) as dummy_article_id from article a join articlesauthors aa on a.id=aa.article_id where author_id in (54962, 55738, 85464, 85465, 125598, 55921) order by created desc;search_query = parse_search_query(' or '.join([kw.keyword for kw in self.keywords])) search_query = parse_search_query(' or '.join([kw.keyword for kw in self.keywords])) alist = [a.id for a in self.authors] s1 = select([ArticleAuthor.article_id]).where(ArticleAuthor.author_id.in_(alist)) s2 = select([Article.id]).where(Article.search_vector.match_tsquery(search_query)) q = Article.query.filter(Article.id.in_(s1.union(s2))).order_by((Article.created + cast("0", Interval)).desc()) #The addition of the extra interval is important because it changes the way the query plan is computed and makes it run 100x faster! return q
def query_art_types(query): combined_sv = ArtType.search_vector |\ Work.search_vector |\ Medium.search_vector primary_search = ArtType.query.search(query, sort=True).all() secondary_search = (ArtType.query.join(Work).join(Medium).filter( combined_sv.match(parse_search_query(query)))) filtered_types = [art_type for art_type in secondary_search \ if art_type not in primary_search] results = primary_search + filtered_types return results
def search(search): session, db = get_db() search = ' '.join(search) apps = session.query(Application).join(User).filter( ft_search_vector.match(parse_search_query(search))) to_print = [] for result in apps: to_print.append( (result.title, ', '.join([x.name for x in result.collections]), result.author.name, result.hearts)) to_print.sort(key=operator.itemgetter(-1)) click.echo(tabulate(to_print, tablefmt='fancy_grid'))
def search(): search_query = request.args.get('query') search_query = search_query.strip() search_terms = search_query.split(' ') search_query_or = ' or '.join(search_terms) table_query = request.args.get('table') if table_query == 'Celebrity': table = Celebrity item_mapper = celeb_item_mapper search_vector = Celebrity.search_vector # | CelebrityAlias.search_vector elif table_query == 'Crime': table = Crime item_mapper = crime_item_mapper search_vector = Crime.search_vector elif table_query == 'Charge': table = Charge item_mapper = charge_item_mapper search_vector = Charge.search_vector else: abort(500) and_query = table.query.filter( search_vector.match(parse_search_query(search_query))) and_results = and_query.all() or_query = table.query.filter( search_vector.match(parse_search_query(search_query_or))) or_results = or_query.all() or_results = [item for item in or_results if item not in and_results] and_items = [item_mapper(item, search_terms) for item in and_results] or_items = [item_mapper(item, search_terms) for item in or_results] #items = [item_mapper(item, search_terms) for item in and_results] return render_template('search.html', and_items=and_items, or_items=or_items, query=search_query, table=table_query)
def query_works(query): combined_sv = Work.search_vector |\ Artist.search_vector |\ ArtType.search_vector |\ Venue.search_vector primary_search = Work.query.search(query, sort=True).all() secondary_search = ( Work.query.join(Artist).join(ArtType).join(Venue).filter( combined_sv.match(parse_search_query(query)))) filtered_works = [work for work in secondary_search \ if work not in primary_search] results = primary_search + filtered_works return results
def feed_articles(self): # explain (analyze,buffers) select * from article INNER JOIN (select id from article as blah where search_vector @@ to_tsquery('circuit:* & qed:* | qubit:*') union select article_id from articlesauthors as blah where author_id in (54962, 55738, 85464, 85465, 125598, 55921)) on id=blah order by created desc; #select * from (select distinct on (id) * from (select articles.* from articles where search_vector @@ ... union all select a.* from articles a join articlesauthors aa on ... where author_id = any (...)) s1) s2 order by created_at desc; #explain (analyze,buffers) select article.*, (article.id+0) as dummy_article_id from article where search_vector @@ to_tsquery('circuit:* & qed:* | qubit:*') union select a.*, (a.id+0) as dummy_article_id from article a join articlesauthors aa on a.id=aa.article_id where author_id in (54962, 55738, 85464, 85465, 125598, 55921) order by created desc;search_query = parse_search_query(' or '.join([kw.keyword for kw in self.keywords])) #select article.*, (article.id+0) as dummy_article_id from article where search_vector @@ to_tsquery('circuit:* & qed:* | qubit:*') union select a.*, (a.id+0) as dummy_article_id from article a join articlesauthors aa on a.id=aa.article_id where author_id in (54962, 55738, 85464, 85465, 125598, 55921) order by created desc;search_query = parse_search_query(' or '.join([kw.keyword for kw in self.keywords])) search_query = parse_search_query(' or '.join( [kw.keyword for kw in self.keywords])) alist = [a.id for a in self.authors] s1 = select([ArticleAuthor.article_id ]).where(ArticleAuthor.author_id.in_(alist)) s2 = select([Article.id]).where( Article.search_vector.match_tsquery(search_query)) q = Article.query.filter(Article.id.in_(s1.union(s2))).order_by( (Article.created + cast("0", Interval)).desc() ) #The addition of the extra interval is important because it changes the way the query plan is computed and makes it run 100x faster! return q
def query_artists(query): combined_sv = Artist.search_vector |\ Work.search_vector |\ ArtType.search_vector # primary search lists out matches for the artist's search vector # before considering the attributes of related models to keep # results sorted by relevancy primary_search = Artist.query.search(query, sort=True).all() secondary_search = (Artist.query.join(Work).join(ArtType).filter( combined_sv.match(parse_search_query(query)))) filtered_artists = [artist for artist in secondary_search \ if artist not in primary_search] results = primary_search + filtered_artists return results
def finalize_query(self, query, fltr, session, qstring=None, order_by=None): search_query = None if qstring is not None: search_query = parse_search_query(qstring) ft_query = and_( SearchObjectIndex.search_vector.match( search_query, sort=order_by is None, postgresql_regconfig='simple'), SearchObjectIndex.so_uuid == ObjectInfoIndex.uuid, query) else: ft_query = query ranked = False if search_query is not None: query_result = session.query( ObjectInfoIndex, func.ts_rank_cd( SearchObjectIndex.search_vector, func.to_tsquery(search_query)).label('rank')).options( joinedload(ObjectInfoIndex.search_object)).options( joinedload( ObjectInfoIndex.properties)).filter(ft_query) ranked = True else: query_result = session.query(ObjectInfoIndex).options( joinedload(ObjectInfoIndex.properties)).filter(ft_query) if order_by is not None: query_result = query_result.order_by(order_by) elif ranked is True: query_result = query_result.order_by( desc( func.ts_rank_cd(SearchObjectIndex.search_vector, func.to_tsquery(search_query)))) if 'limit' in fltr: query_result = query_result.limit(fltr['limit']) return query_result, ranked
def test_escapes_special_chars(self): assert parse_search_query('star!#') == 'star:*'
def test_single_quotes(self): assert parse_search_query("'star'") == ('star:*')
def test_special_chars(self): assert parse_search_query("star!:*@@?`") == ('star:*')
def query_parsed(self): return searchable.parse_search_query(self.query)
def test_single_quotes(self): assert parse_search_query("'star") == ( 'star:*' )
def test_double_quotes(self): assert parse_search_query('"star') == ( 'star:*' )
def test_and_within_a_token_preceded_by_space(self): assert parse_search_query('star andromeda') == ('star:* & andromeda:*')
def test_multiple_and(self): assert parse_search_query('star and and wars') == 'star:* & wars:*'
def test_multiple_spaces_as_and(self): assert ( parse_search_query('star wars luke') == 'star:* & wars:* & luke:*' )
def test_multiple_ors(self): assert parse_search_query('star or or or wars') == 'star:* | wars:*'
def test_hyphen_between_words(self): assert parse_search_query('star-wars') == 'star:* & wars:*'
def test_numbers(self): assert ( parse_search_query('12331 or 12a12') == '12331:* | 12a12:*' )
def test_operator_parsing(self): assert ( parse_search_query('star or wars') == 'star:* | wars:*' )
def test_or_within_a_token(self): assert parse_search_query('organs') == ('organs:*')
def test_or_within_a_token_preceded_by_space(self): assert parse_search_query('star organs') == ('star:* & organs:*')
def test_empty_parenthesis(self): assert parse_search_query('()') == ''
def test_not(self): assert parse_search_query('-star') == ( '! star:*' )
def test_and_within_a_token_preceded_by_space(self): assert parse_search_query('star andromeda') == ( 'star:* & andromeda:*' )
def test_search_supports_non_english_characters(self): parse_search_query(u'ähtäri') == ( u'ähtäri:*' )
def test_or_within_a_token_preceded_by_space(self): assert parse_search_query('star organs') == ( 'star:* & organs:*' )
def test_multiple_spaces_as_and(self): assert (parse_search_query('star wars luke') == 'star:* & wars:* & luke:*')
def test_emails_without_email_tokens(self): assert ( parse_search_query('*****@*****.**') == 'john:* & fastmonkeys:* & com:*' )
def test_empty_string(self): parse_search_query('') == ''
def test_and(self): assert parse_search_query('star wars') == 'star:* & wars:*'
def test_emails(self): assert ( parse_search_query('*****@*****.**') == '[email protected]:*' )
def test_or_and(self): assert (parse_search_query('star or wars luke or solo') == 'star:* | wars:* & luke:* | solo:*')
def test_or(self): assert parse_search_query('star or wars') == 'star:* | wars:*'
def test_uses_pgsql_wildcard_by_default(self): assert parse_search_query('star wars') == 'star:* & wars:*'
def test_space_as_and(self): assert parse_search_query('star wars') == 'star:* & wars:*'
def test_nested_parenthesis(self): assert parse_search_query('((star wars)) or luke') == ( '(star:* & wars:*) | luke:*')
def test_or_and(self): assert ( parse_search_query('star or wars and luke or solo') == 'star:* | wars:* & luke:* | solo:*' )
def test_not(self): assert parse_search_query('-star') == ('! star:*')
def test_nested_parenthesis(self): assert parse_search_query('((star wars)) or luke') == ( '(star:* & wars:*) | luke:*' )
def test_not_with_parenthesis(self): assert parse_search_query('-(star wars)') == '! (star:* & wars:*)'
def test_double_quotes(self): assert parse_search_query('"star') == ('star:*')
def test_search_supports_non_english_characters(self): parse_search_query(u'ähtäri') == (u'ähtäri:*')