예제 #1
0
파일: snippets.py 프로젝트: tkvclub01/seo
def query_options():
    index = search.Index('products')
    query_string = "product: piano AND price < 5000"

    # Create sort options to sort on price and brand.
    sort_price = search.SortExpression(
        expression='price',
        direction=search.SortExpression.DESCENDING,
        default_value=0)
    sort_brand = search.SortExpression(
        expression='brand',
        direction=search.SortExpression.DESCENDING,
        default_value="")
    sort_options = search.SortOptions(expressions=[sort_price, sort_brand])

    # Create field expressions to add new fields to the scored documents.
    price_per_note_expression = search.FieldExpression(name='price_per_note',
                                                       expression='price/88')
    ivory_expression = search.FieldExpression(
        name='ivory', expression='snippet("ivory", summary, 120)')

    # Create query options using the sort options and expressions created
    # above.
    query_options = search.QueryOptions(
        limit=25,
        returned_fields=['model', 'price', 'description'],
        returned_expressions=[price_per_note_expression, ivory_expression],
        sort_options=sort_options)

    # Build the Query and run the search
    query = search.Query(query_string=query_string, options=query_options)
    results = index.search(query)
    for scored_document in results:
        print(scored_document)
예제 #2
0
 def get(self):
     query = self.request.get('q')
     if not query:
         self.redirect('/')
     else:
         index = search.Index('donation')
         snippet = 'snippet("%s",comments,100)' % query
         options=search.QueryOptions(
             returned_expressions=[
                 search.FieldExpression(name='snippet',expression=snippet)
             ]
         )
         results=index.search(
            query=search.Query(
                query_string=query,
                options=options
            )
        )
         docs=[]
         if results:
             docs=results.results
         tpl_values={
             'donation':docs,
             'query':query
         }
         self.render('serp/serp.html',**tpl_values)
예제 #3
0
파일: query.py 프로젝트: tobyhammond/search
 def get_snippet_expressions(self, snippet_words):
     """Construct the `FieldExpression` objects for the fields that should
     be snippeted when this query is run.
     """
     field_expressions = []
     for field in self._snippeted_fields:
         expression = u'snippet("{}", {})'.format(snippet_words, field)
         field_expressions.append(
             search_api.FieldExpression(name=field, expression=expression))
     return field_expressions
예제 #4
0
    def _buildQuery(self, query, sortq, sort_dict, doc_limit, offsetval):
        """Build and return a search query object."""

        # computed and returned fields examples.  Their use is not required
        # for the application to function correctly.
        computed_expr = search.FieldExpression(name='adjusted_price',
                                               expression='price * 1.08')
        returned_fields = [
            docs.Product.PID, docs.Product.DESCRIPTION, docs.Product.CATEGORY,
            docs.Product.AVG_RATING, docs.Product.PRICE,
            docs.Product.PRODUCT_NAME
        ]

        if sortq == 'relevance':
            # If sorting on 'relevance', use the Match scorer.
            sortopts = search.SortOptions(match_scorer=search.MatchScorer())
            search_query = search.Query(
                query_string=query.strip(),
                options=search.QueryOptions(
                    limit=doc_limit,
                    offset=offsetval,
                    sort_options=sortopts,
                    snippeted_fields=[docs.Product.DESCRIPTION],
                    returned_expressions=[computed_expr],
                    returned_fields=returned_fields))
        else:
            # Otherwise (not sorting on relevance), use the selected field as the
            # first dimension of the sort expression, and the average rating as the
            # second dimension, unless we're sorting on rating, in which case price
            # is the second sort dimension.
            # We get the sort direction and default from the 'sort_dict' var.
            if sortq == docs.Product.AVG_RATING:
                expr_list = [
                    sort_dict.get(sortq),
                    sort_dict.get(docs.Product.PRICE)
                ]
            else:
                expr_list = [
                    sort_dict.get(sortq),
                    sort_dict.get(docs.Product.AVG_RATING)
                ]
            sortopts = search.SortOptions(expressions=expr_list)
            # logging.info("sortopts: %s", sortopts)
            search_query = search.Query(
                query_string=query.strip(),
                options=search.QueryOptions(
                    limit=doc_limit,
                    offset=offsetval,
                    sort_options=sortopts,
                    snippeted_fields=[docs.Product.DESCRIPTION],
                    returned_expressions=[computed_expr],
                    returned_fields=returned_fields))
        return search_query
예제 #5
0
def search_user(name, admin=False):
    query_str = 'name = %s' % name if not admin else 'name = %s AND group: ADMIN' % name
    # Build the SortOptions with 2 sort keys
    sort1 = search.SortExpression(expression='name',
                                  direction=SortExpression.ASCENDING)
    sort_opts = search.SortOptions(expressions=[sort1])

    # Build the QueryOptions
    # Create a FieldExpression
    expr2 = search.FieldExpression(name='name_snippet',
                                   expression='snippet("%s", name, 20)' % name)
    options = search.QueryOptions(sort_options=sort_opts,
                                  returned_expressions=[expr2])
    query = search.Query(query_str, options)
    return user_index.search(query)
예제 #6
0
    def get(self):
        #ALLOWS USERS TO SEARCH THROUGH ALL THE MESSAGES IN THE DATASTORE
        search_query = self.request.get('q')
        #CHECK IF THERE IS A SEARCH QUERY
        if search_query == '':
            self.redirect('/')
        else:
            #SPECIFY THAT THE SEARCH WILL BE DONE ON THE MESSAGES AND THEIR CONTENT
            index = search.Index('messages')
            #GENERATE THE SEARCH QUERY
            snippet = 'snippet("%s", content, 140)' % search_query
            query_options = search.QueryOptions(returned_expressions=[
                search.FieldExpression(name='snippet', expression=snippet)
            ])
            results = index.search(query=search.Query(
                query_string=search_query, options=query_options))
            #CREATE AN ARRAY FOR THE SEARCH RESULTS
            messages = []
            if results:
                messages = results.results

            display_values = {'messages': messages, 'query': search_query}
            #DISPLAY THE RESULTS FOUND IN THE SEARCH RESULTS PAGE
            self.render('search_results/search_results.html', **display_values)
예제 #7
0
파일: query.py 프로젝트: tobyhammond/search
 def add_expression(self, name, expression):
     cloned = self._clone()
     expr = search_api.FieldExpression(name=name, expression=expression)
     cloned._returned_expressions.append(expr)
     return cloned
예제 #8
0
    def _render(self):
        new_search = not self._year or (not self._award_types and not self._seed and not self._playoff_level and not self._cad_model)
        if new_search:
            result_models = []
            num_results = 0
            result_expressions = None
        else:
            # Construct query string
            sort_options_expressions = []
            returned_expressions = []
            partial_queries = []

            search_index = search.Index(name=SearchHelper.TEAM_AWARDS_INDEX)

            partial_queries.append('year={}'.format(self._year))
            award_filter = ' OR '.join(['award={}'.format(award_type) for award_type in self._award_types])
            if award_filter:
                partial_queries.append(award_filter)

            if self._seed:
                seed_field_name = 'seed_{}'.format(self._seed)
                partial_queries.append('{}>0'.format(seed_field_name))
                returned_expressions.append(search.FieldExpression(
                    name='seed_count', expression=seed_field_name))

                if self._sort_field == 'seed':
                    sort_options_expressions.append(
                        search.SortExpression(
                            expression=seed_field_name,
                            direction=search.SortExpression.DESCENDING))

            if self._playoff_level:
                comp_level_name = 'comp_level_{}'.format(self.PLAYOFF_MAP[self._playoff_level])
                partial_queries.append('{}>0'.format(comp_level_name))
                returned_expressions.append(search.FieldExpression(
                    name='comp_level_count', expression=comp_level_name))

                if self._sort_field == 'playoff_level':
                    sort_options_expressions.append(
                        search.SortExpression(
                            expression=comp_level_name,
                            direction=search.SortExpression.DESCENDING))

            if self._cad_model:
                partial_queries.append('has_cad=1')

            query_string = ' AND ' .join(partial_queries)

            # Tiebreak sorting by number
            sort_options_expressions.append(
                search.SortExpression(
                    expression='number',
                    direction=search.SortExpression.ASCENDING))

            # Perform query
            query = search.Query(
                query_string=query_string,
                options=search.QueryOptions(
                    limit=self.PAGE_SIZE,
                    number_found_accuracy=10000,  # Larger than the number of possible results
                    offset=self.PAGE_SIZE * self._page,
                    sort_options=search.SortOptions(
                        expressions=sort_options_expressions
                    ),
                    returned_expressions=returned_expressions
                )
            )

            docs = search_index.search(query)
            num_results = docs.number_found
            model_keys = []
            result_expressions = defaultdict(lambda: defaultdict(float))
            for result in docs.results:
                team_key = result.doc_id.split('_')[0]
                model_keys.append(ndb.Key('Team', team_key))
                for expression in result.expressions:
                    result_expressions[team_key][expression.name] = expression.value

            model_futures = ndb.get_multi_async(model_keys)

            result_models = [model_future.get_result() for model_future in model_futures]

        self.template_values.update({
            'valid_years': self.VALID_YEARS,
            'valid_award_types': self.VALID_AWARD_TYPES,
            'num_special_awards': len(SORT_ORDER),
            'valid_seeds': self.VALID_SEEDS,
            'seed': self._seed,
            'playoff_level': self._playoff_level,
            'page_size': self.PAGE_SIZE,
            'max_results': self.MAX_RESULTS,
            'page': self._page,
            'year': self._year,
            'award_types': self._award_types,
            'cad_model': self._cad_model,
            'new_search': new_search,
            'num_results': num_results,
            'capped_num_results': min(self.MAX_RESULTS, num_results),
            'result_models': result_models,
            'result_expressions': result_expressions,
            'sort_field': self._sort_field,
        })

        return jinja2_engine.render('advanced_search.html', self.template_values)
예제 #9
0
        def _internal_search(cls, query_string, explicit_query_string_overrides=None, cursor_support=False,
                             existing_cursor=None, limit=20, number_found_accuracy=None, offset=None, sort_options=None,
                             returned_fields=None, ids_only=False, snippeted_fields=None, returned_expressions=None,
                             sort_limit=1000, *args, **kwargs):
            """
            Query search records in the search index. Essentially the params are the same as for GAE Search API.
            The exceptions are cursor, returned_expressions and sort_options.

            'explicit_query_string_overrides' is an iterable of tuples of the form ('property', 'value') which can be
            used to explicitly overwrite values from the supplied query string. This is useful if you have some custom
            filters that must only have certain values. It can also be used to prevent searches occurring with
            restricted values; useful as part of permission systems.

            Cursor is replaced by two args - cursor_support and existing_cursor. Existing cursor is the websafe version
            of a cursor returned by a previous query. Obviously if cursor_support is False then we don't process the
            cursor.

            Both returned_expressions and sort_options are lists of tuples instead of passing in search.FieldExpressions
            or search.SortOptions (as this would leak implementation to the client).

            returned_expression = ('name_of_expression', 'expression')
            sort_option = ('sort_expression, 'direction', 'default_value)

            See https://cloud.google.com/appengine/docs/python/search/options for more detailed explanations.

            Sort limit should be overridden if possible matches exceeds 1000. It should be set to a value higher, or
            equal to, the maximum number of results that could be found for a given search.

            :param query_string:
            :param explicit_query_string_overrides:
            :param cursor_support:
            :param existing_cursor:
            :param limit:
            :param number_found_accuracy:
            :param offset:
            :param sort_options:
            :param returned_fields:
            :param ids_only:
            :param snippeted_fields:
            :param returned_expressions:
            :param sort_limit:
            :param args:
            :param kwargs:
            :raises search.Error:
            :raises TypeError:
            :raises ValueError:
            """

            cursor = None
            compiled_sort_options = None
            compiled_field_expressions = None

            if explicit_query_string_overrides:
                # TODO: use regex to split up the query string and swap out/append the explicit params. At the moment
                # multiple values could be passed for the same category, leading to possible data leaks
                query_fragments = []

                for explicit_param in explicit_query_string_overrides:
                    query_fragments.append(u'{}="{}"'.format(explicit_param[0],
                                                             explicit_param[1].replace(',', '\,').replace('+',
                                                                                                          '\+').strip()))

                explicit_string = u' AND '.join(query_fragments)
                if explicit_string:
                    query_string = u'{} {}'.format(query_string, explicit_string)

            if cursor_support:
                if existing_cursor:
                    cursor = search.Cursor(web_safe_string=existing_cursor)
                else:
                    cursor = search.Cursor()

            if sort_options:
                parsed_options = [search.SortExpression(expression=sort_option[0],
                                                        direction=sort_option[1],
                                                        default_value=sort_option[2]) for sort_option in sort_options]
                compiled_sort_options = search.SortOptions(expressions=parsed_options, limit=sort_limit)

            if returned_expressions:
                compiled_field_expressions = [search.FieldExpression(name=field_exp[0], expression=field_exp[1]) for
                                              field_exp in returned_expressions]

            options = search.QueryOptions(
                ids_only=ids_only,
                limit=limit,
                snippeted_fields=snippeted_fields,
                number_found_accuracy=number_found_accuracy,
                returned_fields=returned_fields,
                returned_expressions=compiled_field_expressions,
                sort_options=compiled_sort_options,
                offset=offset,
                cursor=cursor,
            )

            query = search.Query(query_string=query_string, options=options)
            try:
                return cls.index.search_async(query=query)
            except (search.Error, TypeError, ValueError):
                logging.exception(u"Query {0} in {1} failed.".format(query_string, cls.index.name))
                raise
예제 #10
0
    def _render(self):
        year, location, range_limit, search_type, page = self._get_params()

        num_results = 0
        results = []
        distances = []
        if location:
            lat_lon, _ = LocationHelper.get_lat_lon(location, geocode=True)
            if lat_lon:
                lat, lon = lat_lon

                dist_expr = 'distance(location, geopoint({}, {}))'.format(
                    lat, lon)
                if search_type == 'teams':
                    query_string = '{} < {}'.format(
                        dist_expr, range_limit * self.METERS_PER_MILE)
                else:
                    query_string = '{} < {} AND year={}'.format(
                        dist_expr, range_limit * self.METERS_PER_MILE, year)

                offset = self.PAGE_SIZE * page

                query = search.Query(
                    query_string=query_string,
                    options=search.QueryOptions(
                        limit=self.PAGE_SIZE,
                        offset=offset,
                        sort_options=search.SortOptions(expressions=[
                            search.SortExpression(
                                expression=dist_expr,
                                direction=search.SortExpression.ASCENDING)
                        ]),
                        returned_expressions=[
                            search.FieldExpression(name='distance',
                                                   expression=dist_expr)
                        ],
                    ))
                if search_type == 'teams':
                    search_index = search.Index(name="teamLocation")
                else:
                    search_index = search.Index(name="eventLocation")

                docs = search_index.search(query)
                num_results = docs.number_found
                distances = {}
                keys = []
                event_team_count_futures = {}
                for result in docs.results:
                    distances[result.doc_id] = result.expressions[
                        0].value / self.METERS_PER_MILE
                    if search_type == 'teams':
                        event_team_count_futures[
                            result.doc_id] = EventTeam.query(
                                EventTeam.team == ndb.Key(
                                    'Team', result.doc_id),
                                EventTeam.year == year).count_async(
                                    limit=1, keys_only=True)
                        keys.append(ndb.Key('Team', result.doc_id))
                    else:
                        keys.append(ndb.Key('Event', result.doc_id))

                result_futures = ndb.get_multi_async(keys)

                if search_type == 'teams':
                    results = []
                    for result_future, team_key in zip(result_futures, keys):
                        if event_team_count_futures[
                                team_key.id()].get_result() != 0:
                            results.append(result_future.get_result())

                else:
                    results = [
                        result_future.get_result()
                        for result_future in result_futures
                    ]

        self.template_values.update({
            'valid_years': self.VALID_YEARS,
            'valid_ranges': self.VALID_RANGES,
            'page_size': self.PAGE_SIZE,
            'page': page,
            'year': year,
            'location': location,
            'range_limit': range_limit,
            'search_type': search_type,
            'num_results': num_results,
            'results': results,
            'distances': distances,
        })

        return jinja2_engine.render('nearby.html', self.template_values)
예제 #11
0
 def add_expression(self, name, expression):
     expr = search_api.FieldExpression(name=name, expression=expression)
     self._returned_expressions.append(expr)
     return self
예제 #12
0
def search_line(line, index):
    line = line + " I"
    queries_with_text = generate_queries_for_line(line)
    if not queries_with_text:
        return None
    #Generate SOrt Options Object
    sort_opts = search.SortOptions(match_scorer=search.MatchScorer())
    relevance_field_expr = search.FieldExpression(name="relevance_score",
                                                  expression="_score")
    #Generate Query Options Object (including returned_fields)
    query_opts = search.QueryOptions(
        sort_options=sort_opts,
        returned_fields=["doc_id_text"],
        returned_expressions=[relevance_field_expr])
    query_results = []
    #For each query Make Query and Save Results
    for query_text, start, end in queries_with_text:
        query = search.Query(query_string=query_text.strip(),
                             options=query_opts)
        search_results = "HOHOHOHOHO"
        try:
            search_results = index.search(query)
        except search.Error:
            return None
        num_found = search_results.number_found
        num_returned = len(search_results.results)
        assert num_found == num_returned, "Too many documents"
        i = 0
        associated_docs = []
        doc_scores = []
        for doc in reversed(search_results.results
                            ):  # Gets 4 highest matching docs? for this query
            if i > 3:
                break
            associated_docs.append(doc)
            for expr in doc.expressions:
                if expr.name == "relevance_score":
                    try:
                        doc_scores.append(int(expr.value))
                        break
                    except:
                        pass
            i += 1
#        doc_scores = [doc.sort_scores[0] for doc in associated_docs]
        logging.info(doc_scores)
        avg_score = sum(doc_scores) / float(len(doc_scores)) if len(
            doc_scores) else -9999999
        good_doc_ids = [doc.doc_id for doc in associated_docs]
        query_results.append(
            (avg_score, good_doc_ids, query_text, start, end)
        )  # HOW TO RETREIVE SCORES USED FOR SORTING? NEED THEM TO DETERMINE BEST QUERY
    #Choose best query
    print(query_results)
    best_query = max(query_results, key=lambda x: x[0])
    #Make another query for this query, saving the snippet from each text_field
    final_query = search.Query(query_string=best_query[2].strip(),
                               options=search.QueryOptions(
                                   sort_options=sort_opts,
                                   returned_fields=["doc_id_text"],
                                   snippeted_fields=["doc_text"]))
    #Get doc ID's, snippets, and start/stop num and return them
    #CAN ALSO DO ALL OF THE BELOW BY SIMPLY ADDING SNIPPET FIELD TO ORIGINLA QUERIES?
    best_result = None
    try:
        best_results = index.search(final_query)
    except search.Error:
        return None
    i = 0
    associated_docs = []
    for doc in reversed(best_results.results
                        ):  # Gets 4 highest matching docs? for this query
        if i > 3:
            break
        associated_docs.append(doc)
        i += 1
    doc_scores = [doc.sort_scores[0] for doc in associated_docs]
    avg_score = sum(doc_scores) / float(len(doc_scores)) if len(
        doc_scores) else -9999999
    ids_and_blurbs = []
    for doc in associated_docs:
        for expr in doc.expressions:
            if expr.name == "doc_text":
                description_snippet = expr.value
                ids_and_blurbs.append((doc.doc_id, description_snippet))
                break
    final_ret_val = [avg_score, ids_and_blurbs, best_query[3],
                     best_query[4]]  #still need to add KhanAcademy here
    if final_ret_val[0] < 0:
        return None
    return final_ret_val