def query_options(): index = search.Index('products') query_string = "product: piano AND price < 5000" # Create sort options to sort on price and brand. sort_price = search.SortExpression( expression='price', direction=search.SortExpression.DESCENDING, default_value=0) sort_brand = search.SortExpression( expression='brand', direction=search.SortExpression.DESCENDING, default_value="") sort_options = search.SortOptions(expressions=[sort_price, sort_brand]) # Create field expressions to add new fields to the scored documents. price_per_note_expression = search.FieldExpression(name='price_per_note', expression='price/88') ivory_expression = search.FieldExpression( name='ivory', expression='snippet("ivory", summary, 120)') # Create query options using the sort options and expressions created # above. query_options = search.QueryOptions( limit=25, returned_fields=['model', 'price', 'description'], returned_expressions=[price_per_note_expression, ivory_expression], sort_options=sort_options) # Build the Query and run the search query = search.Query(query_string=query_string, options=query_options) results = index.search(query) for scored_document in results: print(scored_document)
def get(self): query = self.request.get('q') if not query: self.redirect('/') else: index = search.Index('donation') snippet = 'snippet("%s",comments,100)' % query options=search.QueryOptions( returned_expressions=[ search.FieldExpression(name='snippet',expression=snippet) ] ) results=index.search( query=search.Query( query_string=query, options=options ) ) docs=[] if results: docs=results.results tpl_values={ 'donation':docs, 'query':query } self.render('serp/serp.html',**tpl_values)
def get_snippet_expressions(self, snippet_words): """Construct the `FieldExpression` objects for the fields that should be snippeted when this query is run. """ field_expressions = [] for field in self._snippeted_fields: expression = u'snippet("{}", {})'.format(snippet_words, field) field_expressions.append( search_api.FieldExpression(name=field, expression=expression)) return field_expressions
def _buildQuery(self, query, sortq, sort_dict, doc_limit, offsetval): """Build and return a search query object.""" # computed and returned fields examples. Their use is not required # for the application to function correctly. computed_expr = search.FieldExpression(name='adjusted_price', expression='price * 1.08') returned_fields = [ docs.Product.PID, docs.Product.DESCRIPTION, docs.Product.CATEGORY, docs.Product.AVG_RATING, docs.Product.PRICE, docs.Product.PRODUCT_NAME ] if sortq == 'relevance': # If sorting on 'relevance', use the Match scorer. sortopts = search.SortOptions(match_scorer=search.MatchScorer()) search_query = search.Query( query_string=query.strip(), options=search.QueryOptions( limit=doc_limit, offset=offsetval, sort_options=sortopts, snippeted_fields=[docs.Product.DESCRIPTION], returned_expressions=[computed_expr], returned_fields=returned_fields)) else: # Otherwise (not sorting on relevance), use the selected field as the # first dimension of the sort expression, and the average rating as the # second dimension, unless we're sorting on rating, in which case price # is the second sort dimension. # We get the sort direction and default from the 'sort_dict' var. if sortq == docs.Product.AVG_RATING: expr_list = [ sort_dict.get(sortq), sort_dict.get(docs.Product.PRICE) ] else: expr_list = [ sort_dict.get(sortq), sort_dict.get(docs.Product.AVG_RATING) ] sortopts = search.SortOptions(expressions=expr_list) # logging.info("sortopts: %s", sortopts) search_query = search.Query( query_string=query.strip(), options=search.QueryOptions( limit=doc_limit, offset=offsetval, sort_options=sortopts, snippeted_fields=[docs.Product.DESCRIPTION], returned_expressions=[computed_expr], returned_fields=returned_fields)) return search_query
def search_user(name, admin=False): query_str = 'name = %s' % name if not admin else 'name = %s AND group: ADMIN' % name # Build the SortOptions with 2 sort keys sort1 = search.SortExpression(expression='name', direction=SortExpression.ASCENDING) sort_opts = search.SortOptions(expressions=[sort1]) # Build the QueryOptions # Create a FieldExpression expr2 = search.FieldExpression(name='name_snippet', expression='snippet("%s", name, 20)' % name) options = search.QueryOptions(sort_options=sort_opts, returned_expressions=[expr2]) query = search.Query(query_str, options) return user_index.search(query)
def get(self): #ALLOWS USERS TO SEARCH THROUGH ALL THE MESSAGES IN THE DATASTORE search_query = self.request.get('q') #CHECK IF THERE IS A SEARCH QUERY if search_query == '': self.redirect('/') else: #SPECIFY THAT THE SEARCH WILL BE DONE ON THE MESSAGES AND THEIR CONTENT index = search.Index('messages') #GENERATE THE SEARCH QUERY snippet = 'snippet("%s", content, 140)' % search_query query_options = search.QueryOptions(returned_expressions=[ search.FieldExpression(name='snippet', expression=snippet) ]) results = index.search(query=search.Query( query_string=search_query, options=query_options)) #CREATE AN ARRAY FOR THE SEARCH RESULTS messages = [] if results: messages = results.results display_values = {'messages': messages, 'query': search_query} #DISPLAY THE RESULTS FOUND IN THE SEARCH RESULTS PAGE self.render('search_results/search_results.html', **display_values)
def add_expression(self, name, expression): cloned = self._clone() expr = search_api.FieldExpression(name=name, expression=expression) cloned._returned_expressions.append(expr) return cloned
def _render(self): new_search = not self._year or (not self._award_types and not self._seed and not self._playoff_level and not self._cad_model) if new_search: result_models = [] num_results = 0 result_expressions = None else: # Construct query string sort_options_expressions = [] returned_expressions = [] partial_queries = [] search_index = search.Index(name=SearchHelper.TEAM_AWARDS_INDEX) partial_queries.append('year={}'.format(self._year)) award_filter = ' OR '.join(['award={}'.format(award_type) for award_type in self._award_types]) if award_filter: partial_queries.append(award_filter) if self._seed: seed_field_name = 'seed_{}'.format(self._seed) partial_queries.append('{}>0'.format(seed_field_name)) returned_expressions.append(search.FieldExpression( name='seed_count', expression=seed_field_name)) if self._sort_field == 'seed': sort_options_expressions.append( search.SortExpression( expression=seed_field_name, direction=search.SortExpression.DESCENDING)) if self._playoff_level: comp_level_name = 'comp_level_{}'.format(self.PLAYOFF_MAP[self._playoff_level]) partial_queries.append('{}>0'.format(comp_level_name)) returned_expressions.append(search.FieldExpression( name='comp_level_count', expression=comp_level_name)) if self._sort_field == 'playoff_level': sort_options_expressions.append( search.SortExpression( expression=comp_level_name, direction=search.SortExpression.DESCENDING)) if self._cad_model: partial_queries.append('has_cad=1') query_string = ' AND ' .join(partial_queries) # Tiebreak sorting by number sort_options_expressions.append( search.SortExpression( expression='number', direction=search.SortExpression.ASCENDING)) # Perform query query = search.Query( query_string=query_string, options=search.QueryOptions( limit=self.PAGE_SIZE, number_found_accuracy=10000, # Larger than the number of possible results offset=self.PAGE_SIZE * self._page, sort_options=search.SortOptions( expressions=sort_options_expressions ), returned_expressions=returned_expressions ) ) docs = search_index.search(query) num_results = docs.number_found model_keys = [] result_expressions = defaultdict(lambda: defaultdict(float)) for result in docs.results: team_key = result.doc_id.split('_')[0] model_keys.append(ndb.Key('Team', team_key)) for expression in result.expressions: result_expressions[team_key][expression.name] = expression.value model_futures = ndb.get_multi_async(model_keys) result_models = [model_future.get_result() for model_future in model_futures] self.template_values.update({ 'valid_years': self.VALID_YEARS, 'valid_award_types': self.VALID_AWARD_TYPES, 'num_special_awards': len(SORT_ORDER), 'valid_seeds': self.VALID_SEEDS, 'seed': self._seed, 'playoff_level': self._playoff_level, 'page_size': self.PAGE_SIZE, 'max_results': self.MAX_RESULTS, 'page': self._page, 'year': self._year, 'award_types': self._award_types, 'cad_model': self._cad_model, 'new_search': new_search, 'num_results': num_results, 'capped_num_results': min(self.MAX_RESULTS, num_results), 'result_models': result_models, 'result_expressions': result_expressions, 'sort_field': self._sort_field, }) return jinja2_engine.render('advanced_search.html', self.template_values)
def _internal_search(cls, query_string, explicit_query_string_overrides=None, cursor_support=False, existing_cursor=None, limit=20, number_found_accuracy=None, offset=None, sort_options=None, returned_fields=None, ids_only=False, snippeted_fields=None, returned_expressions=None, sort_limit=1000, *args, **kwargs): """ Query search records in the search index. Essentially the params are the same as for GAE Search API. The exceptions are cursor, returned_expressions and sort_options. 'explicit_query_string_overrides' is an iterable of tuples of the form ('property', 'value') which can be used to explicitly overwrite values from the supplied query string. This is useful if you have some custom filters that must only have certain values. It can also be used to prevent searches occurring with restricted values; useful as part of permission systems. Cursor is replaced by two args - cursor_support and existing_cursor. Existing cursor is the websafe version of a cursor returned by a previous query. Obviously if cursor_support is False then we don't process the cursor. Both returned_expressions and sort_options are lists of tuples instead of passing in search.FieldExpressions or search.SortOptions (as this would leak implementation to the client). returned_expression = ('name_of_expression', 'expression') sort_option = ('sort_expression, 'direction', 'default_value) See https://cloud.google.com/appengine/docs/python/search/options for more detailed explanations. Sort limit should be overridden if possible matches exceeds 1000. It should be set to a value higher, or equal to, the maximum number of results that could be found for a given search. :param query_string: :param explicit_query_string_overrides: :param cursor_support: :param existing_cursor: :param limit: :param number_found_accuracy: :param offset: :param sort_options: :param returned_fields: :param ids_only: :param snippeted_fields: :param returned_expressions: :param sort_limit: :param args: :param kwargs: :raises search.Error: :raises TypeError: :raises ValueError: """ cursor = None compiled_sort_options = None compiled_field_expressions = None if explicit_query_string_overrides: # TODO: use regex to split up the query string and swap out/append the explicit params. At the moment # multiple values could be passed for the same category, leading to possible data leaks query_fragments = [] for explicit_param in explicit_query_string_overrides: query_fragments.append(u'{}="{}"'.format(explicit_param[0], explicit_param[1].replace(',', '\,').replace('+', '\+').strip())) explicit_string = u' AND '.join(query_fragments) if explicit_string: query_string = u'{} {}'.format(query_string, explicit_string) if cursor_support: if existing_cursor: cursor = search.Cursor(web_safe_string=existing_cursor) else: cursor = search.Cursor() if sort_options: parsed_options = [search.SortExpression(expression=sort_option[0], direction=sort_option[1], default_value=sort_option[2]) for sort_option in sort_options] compiled_sort_options = search.SortOptions(expressions=parsed_options, limit=sort_limit) if returned_expressions: compiled_field_expressions = [search.FieldExpression(name=field_exp[0], expression=field_exp[1]) for field_exp in returned_expressions] options = search.QueryOptions( ids_only=ids_only, limit=limit, snippeted_fields=snippeted_fields, number_found_accuracy=number_found_accuracy, returned_fields=returned_fields, returned_expressions=compiled_field_expressions, sort_options=compiled_sort_options, offset=offset, cursor=cursor, ) query = search.Query(query_string=query_string, options=options) try: return cls.index.search_async(query=query) except (search.Error, TypeError, ValueError): logging.exception(u"Query {0} in {1} failed.".format(query_string, cls.index.name)) raise
def _render(self): year, location, range_limit, search_type, page = self._get_params() num_results = 0 results = [] distances = [] if location: lat_lon, _ = LocationHelper.get_lat_lon(location, geocode=True) if lat_lon: lat, lon = lat_lon dist_expr = 'distance(location, geopoint({}, {}))'.format( lat, lon) if search_type == 'teams': query_string = '{} < {}'.format( dist_expr, range_limit * self.METERS_PER_MILE) else: query_string = '{} < {} AND year={}'.format( dist_expr, range_limit * self.METERS_PER_MILE, year) offset = self.PAGE_SIZE * page query = search.Query( query_string=query_string, options=search.QueryOptions( limit=self.PAGE_SIZE, offset=offset, sort_options=search.SortOptions(expressions=[ search.SortExpression( expression=dist_expr, direction=search.SortExpression.ASCENDING) ]), returned_expressions=[ search.FieldExpression(name='distance', expression=dist_expr) ], )) if search_type == 'teams': search_index = search.Index(name="teamLocation") else: search_index = search.Index(name="eventLocation") docs = search_index.search(query) num_results = docs.number_found distances = {} keys = [] event_team_count_futures = {} for result in docs.results: distances[result.doc_id] = result.expressions[ 0].value / self.METERS_PER_MILE if search_type == 'teams': event_team_count_futures[ result.doc_id] = EventTeam.query( EventTeam.team == ndb.Key( 'Team', result.doc_id), EventTeam.year == year).count_async( limit=1, keys_only=True) keys.append(ndb.Key('Team', result.doc_id)) else: keys.append(ndb.Key('Event', result.doc_id)) result_futures = ndb.get_multi_async(keys) if search_type == 'teams': results = [] for result_future, team_key in zip(result_futures, keys): if event_team_count_futures[ team_key.id()].get_result() != 0: results.append(result_future.get_result()) else: results = [ result_future.get_result() for result_future in result_futures ] self.template_values.update({ 'valid_years': self.VALID_YEARS, 'valid_ranges': self.VALID_RANGES, 'page_size': self.PAGE_SIZE, 'page': page, 'year': year, 'location': location, 'range_limit': range_limit, 'search_type': search_type, 'num_results': num_results, 'results': results, 'distances': distances, }) return jinja2_engine.render('nearby.html', self.template_values)
def add_expression(self, name, expression): expr = search_api.FieldExpression(name=name, expression=expression) self._returned_expressions.append(expr) return self
def search_line(line, index): line = line + " I" queries_with_text = generate_queries_for_line(line) if not queries_with_text: return None #Generate SOrt Options Object sort_opts = search.SortOptions(match_scorer=search.MatchScorer()) relevance_field_expr = search.FieldExpression(name="relevance_score", expression="_score") #Generate Query Options Object (including returned_fields) query_opts = search.QueryOptions( sort_options=sort_opts, returned_fields=["doc_id_text"], returned_expressions=[relevance_field_expr]) query_results = [] #For each query Make Query and Save Results for query_text, start, end in queries_with_text: query = search.Query(query_string=query_text.strip(), options=query_opts) search_results = "HOHOHOHOHO" try: search_results = index.search(query) except search.Error: return None num_found = search_results.number_found num_returned = len(search_results.results) assert num_found == num_returned, "Too many documents" i = 0 associated_docs = [] doc_scores = [] for doc in reversed(search_results.results ): # Gets 4 highest matching docs? for this query if i > 3: break associated_docs.append(doc) for expr in doc.expressions: if expr.name == "relevance_score": try: doc_scores.append(int(expr.value)) break except: pass i += 1 # doc_scores = [doc.sort_scores[0] for doc in associated_docs] logging.info(doc_scores) avg_score = sum(doc_scores) / float(len(doc_scores)) if len( doc_scores) else -9999999 good_doc_ids = [doc.doc_id for doc in associated_docs] query_results.append( (avg_score, good_doc_ids, query_text, start, end) ) # HOW TO RETREIVE SCORES USED FOR SORTING? NEED THEM TO DETERMINE BEST QUERY #Choose best query print(query_results) best_query = max(query_results, key=lambda x: x[0]) #Make another query for this query, saving the snippet from each text_field final_query = search.Query(query_string=best_query[2].strip(), options=search.QueryOptions( sort_options=sort_opts, returned_fields=["doc_id_text"], snippeted_fields=["doc_text"])) #Get doc ID's, snippets, and start/stop num and return them #CAN ALSO DO ALL OF THE BELOW BY SIMPLY ADDING SNIPPET FIELD TO ORIGINLA QUERIES? best_result = None try: best_results = index.search(final_query) except search.Error: return None i = 0 associated_docs = [] for doc in reversed(best_results.results ): # Gets 4 highest matching docs? for this query if i > 3: break associated_docs.append(doc) i += 1 doc_scores = [doc.sort_scores[0] for doc in associated_docs] avg_score = sum(doc_scores) / float(len(doc_scores)) if len( doc_scores) else -9999999 ids_and_blurbs = [] for doc in associated_docs: for expr in doc.expressions: if expr.name == "doc_text": description_snippet = expr.value ids_and_blurbs.append((doc.doc_id, description_snippet)) break final_ret_val = [avg_score, ids_and_blurbs, best_query[3], best_query[4]] #still need to add KhanAcademy here if final_ret_val[0] < 0: return None return final_ret_val