예제 #1
0
def get_sort_options(expressions=None, match_scorer=None, limit=1000):
    """A function to handle the sort expression API differences in 1.6.4
  vs. 1.6.5+.

 An example of usage (NOTE: Do NOT put limit SortExpression or MatchScorer):

  expr_list = [
      search.SortExpression(expression='author', default_value='',
                            direction=search.SortExpression.DESCENDING)]
  sortopts = get_sort_options(expression=expr_list, limit=sort_limit)

  The returned value is used in constructing the query options:

  qoptions=search.QueryOptions(limit=doc_limit, sort_options=sortopts)

  Another example illustrating sorting on an expression based on a
  MatchScorer score:

  expr_list = [
      search.SortExpression(expression='_score + 0.001 * rating',
                            default_value='',
                            direction=search.SortExpression.DESCENDING)]
  sortopts = get_sort_options(expression=expr_list,
                              match_scorer=search.MatchScorer(),
                              limit=sort_limit)


  Args:
    expression: a list of search.SortExpression. Do not set limit parameter on
      SortExpression
    match_scorer: a search.MatchScorer or search.RescoringMatchScorer. Do not
      set limit parameter on either scorer
    limit: the scoring limit

  Returns: the sort options value, either list of SortOption (1.6.4) or
  SortOptions (1.6.5), to set the sort_options field in the QueryOptions object.
  """
    try:
        # using 1.6.5 or greater
        if search.SortOptions:
            logging.debug("search.SortOptions is defined.")
            return search.SortOptions(expressions=expressions,
                                      match_scorer=match_scorer,
                                      limit=limit)

    # SortOptions not available, so using 1.6.4
    except AttributeError:
        logging.debug("search.SortOptions is not defined.")
        expr_list = []
        # copy the sort expressions including the limit info
        if expressions:
            expr_list = [
                search.SortExpression(expression=e.expression,
                                      direction=e.direction,
                                      default_value=e.default_value,
                                      limit=limit) for e in expressions
            ]
        # add the match scorer, if defined, to the expressions list.
        if isinstance(match_scorer, search.MatchScorer):
            expr_list.append(match_scorer.__class__(limit=limit))
        logging.info("sort expressions: %s", expr_list)
        return expr_list
예제 #2
0
    def get(self):
        self.response.headers['Content-Type'] = 'text/plain'
        search_query = str(
            urllib.unquote(cgi.escape(self.request.get('q')).lower()[:100]))
        name = str(
            urllib.unquote(cgi.escape(self.request.get('name')).lower()[:50]))
        token = str(urllib.unquote(cgi.escape(self.request.get('token'))))
        page_num = parse_int(
            urllib.unquote(cgi.escape(self.request.get('page_num'))), 1)
        page_size = parse_int(
            urllib.unquote(cgi.escape(self.request.get('page_size'))), 20)

        s = ''
        for c in search_query:
            if c in string.letters or c in string.digits or c == ' ':
                s += c
        search_query = s

        if search_query + name == "":
            d = {}
            d['data'] = []
            d['token'] = token
            d['q'] = ""
            s = json.dumps(d)
            self.response.out.write(s)
            return

        if search_query == "":
            search_query = name

        if page_size > _PAGE_SIZE or page_size < 1:
            page_size = _PAGE_SIZE

        queries = map(str, search_query.split())
        queries = sorted(queries)
        query_string = ' AND '.join(queries)

        d = {}
        d["data"] = []
        d["token"] = token
        d["q"] = search_query

        data = []
        #Sort results by first name descending
        expr_list = [
            search.SortExpression(expression='first_name',
                                  default_value='',
                                  direction=search.SortExpression.DESCENDING)
        ]
        # construct the sort options
        sort_opts = search.SortOptions(expressions=expr_list)
        offset_num = (page_num - 1) * page_size
        query_options = search.QueryOptions(limit=page_size,
                                            offset=offset_num,
                                            ids_only=True,
                                            sort_options=sort_opts)
        results = search.Index(name=_INDEX_NAME).search(query=search.Query(
            query_string=query_string, options=query_options))

        for result in results:
            rcsid = result.doc_id
            r = Person.get_by_id(rcsid)
            if r:
                per = Person.buildMap(r)
                per['name'] = per['name'].title()
                data.append(per)
        d["data"] = data
        s = json.dumps(d)
        self.response.out.write(s)
예제 #3
0
파일: docs.py 프로젝트: miketruty/public
class Video(BaseDocumentManager):
    """Provides helper methods to manage Video documents.  All Video documents
  built using these methods will include a core set of fields (see the
  _buildCoreVideoFields method).  We use the given video id (the Video
  entity key) as the doc_id.  This is not required for the entity/document
  design-- each explicitly point to each other, allowing their ids to be
  decoupled-- but using the video id as the doc id allows a document to be
  reindexed given its video info, without having to fetch the
  existing document."""

    _INDEX_NAME = config.VIDEO_INDEX_NAME

    # 'core' video document field names
    UNIQUEID = config.FIELD_UNIQUE_ID
    TITLE = config.FIELD_TITLE
    CATEGORY = config.FIELD_EVENT
    SUBTITLE = config.FIELD_SUBTITLE
    DURATION_MIN = config.FIELD_DURATION
    SPEAKERS = config.FIELD_SPEAKERS
    DESCRIPTION = config.FIELD_DESC
    PUBLISHED_DATE = config.FIELD_PUB_DATE
    SLIDES_LINK = config.FIELD_SLIDES_LINK
    VIEWS = config.FIELD_VIEW_COUNT
    TAGS = config.FIELD_TAGS
    IMAGE = config.FIELD_IMAGE
    VID = config.FIELD_VIDEO_ID
    SESSIONID = config.FIELD_SESSION_ID

    AVG_RATING = 'ar'  #average rating
    UPDATED = 'modified'

    _SORT_OPTIONS = [
        [
            #     AVG_RATING, 'average rating', search.SortExpression(
            #         expression=AVG_RATING,
            #         direction=search.SortExpression.DESCENDING,
            #         default_value=0)
            # ], [
            VIEWS,
            Readable(VIEWS),
            search.SortExpression(expression=VIEWS,
                                  direction=search.SortExpression.DESCENDING,
                                  default_value=0)
        ],
        [
            DURATION_MIN,
            Readable(DURATION_MIN),
            search.SortExpression(expression=DURATION_MIN,
                                  direction=search.SortExpression.ASCENDING,
                                  default_value=9999)
        ],
        [
            PUBLISHED_DATE,
            Readable(PUBLISHED_DATE),
            search.SortExpression(expression=PUBLISHED_DATE,
                                  direction=search.SortExpression.DESCENDING,
                                  default_value=1)
        ],
        [
            #     UPDATED, Readable(UPDATED), search.SortExpression(
            #         expression=UPDATED,
            #         direction=search.SortExpression.DESCENDING,
            #         default_value=1)
            # ], [
            CATEGORY,
            Readable(CATEGORY),
            search.SortExpression(expression=CATEGORY,
                                  direction=search.SortExpression.ASCENDING,
                                  default_value='')
        ],
        [
            TITLE,
            Readable(TITLE),
            search.SortExpression(expression=TITLE,
                                  direction=search.SortExpression.ASCENDING,
                                  default_value='zzz')
        ]
    ]

    _SORT_MENU = None
    _SORT_DICT = None

    @classmethod
    def deleteAllInVideoIndex(cls):
        cls.deleteAllInIndex()

    @classmethod
    def getSortMenu(cls):
        if not cls._SORT_MENU:
            cls._buildSortMenu()
        return cls._SORT_MENU

    @classmethod
    def getSortDict(cls):
        if not cls._SORT_DICT:
            cls._buildSortDict()
        return cls._SORT_DICT

    @classmethod
    def _buildSortMenu(cls):
        """Build the default set of sort options used for Video search.
    Of these options, all but 'relevance' reference core fields that
    all Videos will have."""
        res = [(elt[0], elt[1]) for elt in cls._SORT_OPTIONS]
        cls._SORT_MENU = [('relevance', 'relevance')] + res

    @classmethod
    def _buildSortDict(cls):
        """Build a dict that maps sort option keywords to their corresponding
    SortExpressions."""
        cls._SORT_DICT = {}
        for elt in cls._SORT_OPTIONS:
            cls._SORT_DICT[elt[0]] = elt[2]

    @classmethod
    def getDocFromUid(cls, uid):
        """Given a uid, get its doc. We're using the uid as the doc id, so we can
    do this via a direct fetch."""
        return cls.getDoc(uid)

    @classmethod
    def removeVideoDocByUid(cls, uid):
        """Given a doc's vid, remove the doc matching it from the video
    index."""
        cls.removeDocById(uid)

    @classmethod
    def updateRatingInDoc(cls, doc_id, avg_rating):
        # get the associated doc from the doc id in the video entity
        doc = cls.getDoc(doc_id)
        if doc:
            pdoc = cls(doc)
            pdoc.setAvgRating(avg_rating)
            # The use of the same id will cause the existing doc to be reindexed.
            return doc
        else:
            raise errors.OperationFailedError(
                'Could not retrieve doc associated with id %s' % (doc_id, ))

    @classmethod
    def updateRatingsInfo(cls, doc_id, avg_rating):
        """Given a models.Video entity, update and reindex the associated
    document with the video entity's current average rating. """

        ndoc = cls.updateRatingInDoc(doc_id, avg_rating)
        # reindex the returned updated doc
        return cls.add(ndoc)


# 'accessor' convenience methods

    def getUniqueID(self):
        """Get the value of the 'uniqueid' field of a Video doc."""
        return self.getFieldVal(self.UNIQUEID)

    def getTitle(self):
        """Get the value of the 'title' field of a Video doc."""
        return self.getFieldVal(self.TITLE)

    def getCategory(self):
        """Get the value of the 'cat' field of a Video doc."""
        return self.getFieldVal(self.CATEGORY)

    def setCategory(self, cat):
        """Set the value of the 'cat' (category) field of a Video doc."""
        return self.setFirstField(
            search.NumberField(name=self.CATEGORY, value=cat))

    def getSlidesLink(self):
        """Get the value of the 'slides_link' field of a Video doc."""
        return self.getFieldVal(self.SLIDES_LINK)

    def getSubtitle(self):
        """Get the value of the 'sutitle' field of a Video doc."""
        return self.getFieldVal(self.SUBTITLE)

    def getDurationMin(self):
        """Get the value of the 'duration_min' field of a Video doc."""
        return self.getFieldVal(self.DURATION_MIN)

    def getSpeakers(self):
        """Get the value of the 'speakers' field of a Video doc."""
        return self.getFieldVal(self.SPEAKERS)

    def getDescription(self):
        """Get the value of the 'description' field of a Video doc."""
        return self.getFieldVal(self.DESCRIPTION)

    def getPublishedDate(self):
        """Get the value of the 'published_date' field of a Video doc."""
        return self.getFieldVal(self.PUBLISHED_DATE)

    def getViews(self):
        """Get the value of the 'views' field of a Video doc."""
        return self.getFieldVal(self.VIEWS)

    def getTags(self):
        """Get the value of the 'tags' field of a Video doc."""
        return self.getFieldVal(self.TAGS)

    def getImage(self):
        """Get the value of the 'image' field of a Video doc."""
        return self.getFieldVal(self.IMAGE)

    def getVID(self):
        """Get the value of the 'vid' field of a Video doc."""
        return self.getFieldVal(self.VID)

    def getSessionID(self):
        """Get the value of the 'sessionid' field of a Video doc."""
        return self.getFieldVal(self.SESSIONID)

    def getAvgRating(self):
        """Get the value of the 'ar' (average rating) field of a Video doc."""
        return self.getFieldVal(self.AVG_RATING)

    def setAvgRating(self, ar):
        """Set the value of the 'ar' field of a Video doc."""
        return self.setFirstField(
            search.NumberField(name=self.AVG_RATING, value=ar))

    @classmethod
    def generateRatingsBuckets(cls, query_string):
        """Builds a dict of ratings 'buckets' and their counts, based on the
    value of the 'avg_rating" field for the documents retrieved by the given
    query.  See the 'generateRatingsLinks' method.  This information will
    be used to generate sidebar links that allow the user to drill down in query
    results based on rating.

    For demonstration purposes only; this will be expensive for large data
    sets.
    """

        # do the query on the *full* search results
        # to generate the facet information, imitating what may in future be
        # provided by the FTS API.
        try:
            sq = search.Query(query_string=query_string.strip())
            search_results = cls.getIndex().search(sq)
        except search.Error:
            logging.exception('An error occurred on search.')
            return None

        ratings_buckets = collections.defaultdict(int)
        # populate the buckets
        for res in search_results:
            ratings_buckets[int((cls(res)).getAvgRating() or 0)] += 1
        return ratings_buckets

    @classmethod
    def generateRatingsLinks(cls, query, vhash):
        """Given a dict of ratings 'buckets' and their counts,
    builds a list of html snippets, to be displayed in the sidebar when
    showing results of a query. Each is a link that runs the query, additionally
    filtered by the indicated ratings interval."""

        ratings_buckets = cls.generateRatingsBuckets(query)
        if not ratings_buckets:
            return None
        rlist = []
        for k in range(config.RATING_MIN, config.RATING_MAX + 1):
            try:
                v = ratings_buckets[k]
            except KeyError:
                return
            # build html
            if k < 5:
                htext = '%s-%s (%s)' % (k, k + 1, v)
            else:
                htext = '%s (%s)' % (k, v)
            vhash['rating'] = k
            hlink = '/vsearch?' + urllib.urlencode(vhash)
            rlist.append((hlink, htext))
        return rlist

    @classmethod
    def _buildCoreVideoFields(cls, unique_id, title, category, subtitle,
                              duration_min, speakers, description,
                              published_date, views, slides_link, tags, image,
                              video_id, session_id):
        """Construct a 'core' document field list for the fields common to all
    Videos. The various categories (as defined in the file 'categories.py'),
    may add additional specialized fields; these will be appended to this
    core list. (see _buildVideoFields)."""
        fields = [
            search.TextField(name=cls.UNIQUEID, value=unique_id),
            # The 'updated' field is always set to the current date.
            search.DateField(name=cls.UPDATED,
                             value=datetime.datetime.now().date()),
            search.TextField(name=cls.TITLE, value=title),
            search.AtomField(name=cls.CATEGORY, value=category),
            search.TextField(name=cls.SUBTITLE, value=subtitle),
            search.NumberField(name=cls.DURATION_MIN, value=int(duration_min)),
            search.TextField(name=cls.SPEAKERS, value=ScrubSpeakers(speakers)),
            # strip the markup from the description value, which can
            # potentially come from user input.  We do this so that
            # we don't need to sanitize the description in the
            # templates, showing off the Search API's ability to mark up query
            # terms in generated snippets.  This is done only for
            # demonstration purposes; in an actual app,
            # it would be preferrable to use a library like Beautiful Soup
            # instead.
            # We'll let the templating library escape all other rendered
            # values for us, so this is the only field we do this for.
            search.TextField(name=cls.DESCRIPTION,
                             value=ScrubDescription(description)),
            search.NumberField(name=cls.VIEWS, value=int(views)),
            search.TextField(name=cls.SLIDES_LINK, value=slides_link),
            search.TextField(name=cls.TAGS, value=tags),
            search.TextField(name=cls.IMAGE, value=image),
            search.TextField(name=cls.VID, value=video_id),
            search.TextField(name=cls.SESSIONID, value=session_id),
            search.NumberField(name=cls.AVG_RATING, value=0.0),
        ]
        # Some fields can sometimes be empty.
        scrubbed_publish_date = utils.dateFromDateString(published_date)
        if scrubbed_publish_date:
            fields.append(
                search.DateField(name=cls.PUBLISHED_DATE,
                                 value=scrubbed_publish_date))
        return fields

    @classmethod
    def _buildVideoFields(cls, unique_id, category, title, category_name,
                          subtitle, duration_min, speakers, description,
                          published_date, views, slides_link, tags, image,
                          video_id, session_id, **params):
        """Build all the additional non-core fields for a document of the given
    video type (category), using the given params dict, and the
    already-constructed list of 'core' fields.  All such additional
    category-specific fields are treated as required.
    """
        fields = cls._buildCoreVideoFields(unique_id, title, category,
                                           subtitle, duration_min, speakers,
                                           description, published_date, views,
                                           slides_link, tags, image, video_id,
                                           session_id)
        # get the specification of additional (non-'core') fields for this category
        vdict = categories.event_dict.get(category_name)
        if vdict:
            # for all fields
            for k, field_type in vdict.iteritems():
                # see if there is a value in the given params for that field.
                # if there is, get the field type, create the field, and append to the
                # document field list.
                if k in params:
                    v = params[k]
                    if field_type == search.NumberField:
                        try:
                            val = float(v)
                            fields.append(search.NumberField(name=k,
                                                             value=val))
                        except ValueError:
                            error_message = (
                                'bad value %s for field %s of type %s' %
                                (k, v, field_type))
                            logging.error(error_message)
                            raise errors.OperationFailedError(error_message)
                    elif field_type == search.TextField:
                        fields.append(search.TextField(name=k, value=str(v)))
                    else:
                        # you may want to add handling of other field types for generality.
                        # Not needed for our current sample data.
                        logging.warn('not processed: %s, %s, of type %s', k, v,
                                     field_type)
                else:
                    error_message = (
                        'value not given for field "%s" of field type "%s"' %
                        (k, field_type))
                    logging.warn(error_message)
                    raise errors.OperationFailedError(error_message)
        #else:
        #  # else, did not have an entry in the params dict for the given field.
        #  logging.warn(
        #      'video field information not found for category name %s',
        #      params['category_name'])
        return fields

    @classmethod
    def _createDocument(cls,
                        unique_id=None,
                        event=None,
                        title=None,
                        category_name=None,
                        subtitle=None,
                        duration_min=None,
                        speakers=None,
                        description=None,
                        published_date=None,
                        views=None,
                        slides_link=None,
                        tags=None,
                        image=None,
                        video_id=None,
                        session_id=None,
                        **params):
        """Create a Document object from given params."""
        # check for the fields that are always required.
        if unique_id and event and title:
            # First, check that the given unique_id has only visible ascii characters,
            # and does not contain whitespace.  The unique_id will be used as the
            # doc_id, which has these requirements.
            if not cls.isValidDocId(unique_id):
                raise errors.OperationFailedError('Illegal unique_id %s' %
                                                  unique_id)
            # construct the document fields from the params
            resfields = cls._buildVideoFields(unique_id=unique_id,
                                              category=event,
                                              title=title,
                                              category_name=category_name,
                                              subtitle=subtitle,
                                              duration_min=duration_min,
                                              speakers=speakers,
                                              description=description,
                                              published_date=published_date,
                                              views=views,
                                              slides_link=slides_link,
                                              tags=tags,
                                              image=image,
                                              video_id=video_id,
                                              session_id=session_id,
                                              **params)
            # build and index the document.  Use the video_id as the doc id.
            # (If we did not do this, and left the doc_id unspecified, an id would be
            # auto-generated.)
            d = search.Document(doc_id=unique_id, fields=resfields)
            return d
        else:
            raise errors.OperationFailedError('Missing parameter.')

    @classmethod
    def _normalizeParams(cls, params):
        """Normalize the submitted params for building a video."""
        params = copy.deepcopy(params)
        try:
            params[cls.UNIQUEID] = params[cls.UNIQUEID].strip()
            params[cls.TITLE] = params[cls.TITLE].strip()
            params['category_name'] = params[cls.CATEGORY]
            params[cls.CATEGORY] = params[cls.CATEGORY]
            if params[cls.SUBTITLE]:
                params[cls.SUBTITLE] = params[cls.SUBTITLE].strip()
            if params[cls.SPEAKERS]:
                params[cls.SPEAKERS] = params[cls.SPEAKERS].strip()
            if params[cls.SLIDES_LINK]:
                params[cls.SLIDES_LINK] = params[cls.SLIDES_LINK].strip()
            try:
                params[cls.DURATION_MIN] = int(params[cls.DURATION_MIN])
            except (ValueError, TypeError):
                error_message = 'bad duration_min value: %s' % params[
                    cls.DURATION_MIN]
                logging.error(error_message)
                raise errors.OperationFailedError(error_message)
            try:
                params[cls.VIEWS] = int(params[cls.VIEWS])
            except ValueError:
                error_message = 'bad views value: %s' % params[cls.VIEWS]
                logging.error(error_message)
                raise errors.OperationFailedError(error_message)
            if params[cls.TAGS]:
                params[cls.TAGS] = params[cls.TAGS].strip()
            if params[cls.IMAGE]:
                params[cls.IMAGE] = params[cls.IMAGE].strip()
            if params[cls.VID]:
                params[cls.VID] = params[cls.VID].strip()
            if params[cls.SESSIONID]:
                params[cls.SESSIONID] = params[cls.SESSIONID].strip()
            return params
        except KeyError as e1:
            logging.exception('key error')
            raise errors.OperationFailedError(e1)
        except errors.Error as e2:
            logging.debug('Problem with params: %s: %s', params,
                          e2.error_message)
            raise errors.OperationFailedError(e2.error_message)

    @classmethod
    def buildVideoBatch(cls, rows):
        """Build video documents and their related datastore entities, in batch,
    given a list of params dicts.  Should be used for new videos, as does not
    handle updates of existing video entities. This method does not require
    that the doc ids be tied to the video ids, and obtains the doc ids from
    the results of the document add."""
        docs = []
        dbps = []
        for row in rows:
            try:
                params = cls._normalizeParams(row)
                doc = cls._createDocument(**params)
                docs.append(doc)
                # create video entity, sans doc_id
                dbp = models.Video(id=params[cls.UNIQUEID],
                                   duration_min=int(params[cls.DURATION_MIN]),
                                   category=params[cls.CATEGORY])
                dbps.append(dbp)
            except errors.OperationFailedError:
                logging.error('error creating document from data: %s.', row)
        logging.debug('buildVideoBatch: docs=%s.', len(docs))
        logging.debug('buildVideoBatch: dbps=%s.', len(dbps))
        try:
            add_results = cls.add(docs)
        except search.Error:
            logging.exception('Add failed')
            return
        if len(add_results) != len(dbps):
            # this case should not be reached; if there was an issue,
            # search.Error should have been thrown, above.
            raise errors.OperationFailedError(
                'Error: wrong number of results returned from indexing operation'
            )
        # now set the entities with the doc ids, the list of which are returned in
        # the same order as the list of docs given to the indexers
        for i, dbp in enumerate(dbps):
            dbp.doc_id = add_results[i].id
        # persist the entities
        ndb.put_multi(dbps)

    @classmethod
    def buildVideo(cls, params):
        """Create/update a video document and its related datastore entity.  The
    video id and the field values are taken from the params dict.
    """
        params = cls._normalizeParams(params)
        # check to see if doc already exists.  We do this because we need to retain
        # some information from the existing doc.  We could skip the fetch if this
        # were not the case.
        curr_doc = cls.getDocFromUid(params[cls.UNIQUEID])
        d = cls._createDocument(**params)
        if curr_doc:  #  retain ratings info from existing doc
            avg_rating = cls(curr_doc).getAvgRating()
            cls(d).setAvgRating(avg_rating)

        # This will reindex if a doc with that doc id already exists
        doc_ids = cls.add(d)
        try:
            doc_id = doc_ids[0].id
        except IndexError:
            doc_id = None
            raise errors.OperationFailedError('could not index document')
        logging.debug('got new doc id %s for video: %s', doc_id,
                      params[cls.UNIQUEID])

        # now update the entity
        def _tx():
            # Check whether the video entity exists. If so, we want to update
            # from the params, but preserve its ratings-related info.
            v = models.Video.get_by_id(params[cls.UNIQUEID])
            if v:  #update
                v.update_core(params, doc_id)
            else:  # create new entity
                v = models.Video.create(params, doc_id)
            v.put()
            return v

        v = ndb.transaction(_tx)
        logging.debug('video: %s', v)
        return v
예제 #4
0
    def _render(self):
        year, location, range_limit, search_type, page = self._get_params()

        num_results = 0
        results = []
        distances = []
        if location:
            lat_lon, _ = LocationHelper.get_lat_lon(location, geocode=True)
            if lat_lon:
                lat, lon = lat_lon

                dist_expr = 'distance(location, geopoint({}, {}))'.format(
                    lat, lon)
                if search_type == 'teams':
                    query_string = '{} < {}'.format(
                        dist_expr, range_limit * self.METERS_PER_MILE)
                else:
                    query_string = '{} < {} AND year={}'.format(
                        dist_expr, range_limit * self.METERS_PER_MILE, year)

                offset = self.PAGE_SIZE * page

                query = search.Query(
                    query_string=query_string,
                    options=search.QueryOptions(
                        limit=self.PAGE_SIZE,
                        offset=offset,
                        sort_options=search.SortOptions(expressions=[
                            search.SortExpression(
                                expression=dist_expr,
                                direction=search.SortExpression.ASCENDING)
                        ]),
                        returned_expressions=[
                            search.FieldExpression(name='distance',
                                                   expression=dist_expr)
                        ],
                    ))
                if search_type == 'teams':
                    search_index = search.Index(name="teamLocation")
                else:
                    search_index = search.Index(name="eventLocation")

                docs = search_index.search(query)
                num_results = docs.number_found
                distances = {}
                keys = []
                event_team_count_futures = {}
                for result in docs.results:
                    distances[result.doc_id] = result.expressions[
                        0].value / self.METERS_PER_MILE
                    if search_type == 'teams':
                        event_team_count_futures[
                            result.doc_id] = EventTeam.query(
                                EventTeam.team == ndb.Key(
                                    'Team', result.doc_id),
                                EventTeam.year == year).count_async(
                                    limit=1, keys_only=True)
                        keys.append(ndb.Key('Team', result.doc_id))
                    else:
                        keys.append(ndb.Key('Event', result.doc_id))

                result_futures = ndb.get_multi_async(keys)

                if search_type == 'teams':
                    results = []
                    for result_future, team_key in zip(result_futures, keys):
                        if event_team_count_futures[
                                team_key.id()].get_result() != 0:
                            results.append(result_future.get_result())

                else:
                    results = [
                        result_future.get_result()
                        for result_future in result_futures
                    ]

        self.template_values.update({
            'valid_years': self.VALID_YEARS,
            'valid_ranges': self.VALID_RANGES,
            'page_size': self.PAGE_SIZE,
            'page': page,
            'year': year,
            'location': location,
            'range_limit': range_limit,
            'search_type': search_type,
            'num_results': num_results,
            'results': results,
            'distances': distances,
        })

        return jinja2_engine.render('nearby.html', self.template_values)
예제 #5
0
        def _internal_search(cls, query_string, explicit_query_string_overrides=None, cursor_support=False,
                             existing_cursor=None, limit=20, number_found_accuracy=None, offset=None, sort_options=None,
                             returned_fields=None, ids_only=False, snippeted_fields=None, returned_expressions=None,
                             sort_limit=1000, *args, **kwargs):
            """
            Query search records in the search index. Essentially the params are the same as for GAE Search API.
            The exceptions are cursor, returned_expressions and sort_options.

            'explicit_query_string_overrides' is an iterable of tuples of the form ('property', 'value') which can be
            used to explicitly overwrite values from the supplied query string. This is useful if you have some custom
            filters that must only have certain values. It can also be used to prevent searches occurring with
            restricted values; useful as part of permission systems.

            Cursor is replaced by two args - cursor_support and existing_cursor. Existing cursor is the websafe version
            of a cursor returned by a previous query. Obviously if cursor_support is False then we don't process the
            cursor.

            Both returned_expressions and sort_options are lists of tuples instead of passing in search.FieldExpressions
            or search.SortOptions (as this would leak implementation to the client).

            returned_expression = ('name_of_expression', 'expression')
            sort_option = ('sort_expression, 'direction', 'default_value)

            See https://cloud.google.com/appengine/docs/python/search/options for more detailed explanations.

            Sort limit should be overridden if possible matches exceeds 1000. It should be set to a value higher, or
            equal to, the maximum number of results that could be found for a given search.

            :param query_string:
            :param explicit_query_string_overrides:
            :param cursor_support:
            :param existing_cursor:
            :param limit:
            :param number_found_accuracy:
            :param offset:
            :param sort_options:
            :param returned_fields:
            :param ids_only:
            :param snippeted_fields:
            :param returned_expressions:
            :param sort_limit:
            :param args:
            :param kwargs:
            :raises search.Error:
            :raises TypeError:
            :raises ValueError:
            """

            cursor = None
            compiled_sort_options = None
            compiled_field_expressions = None

            if explicit_query_string_overrides:
                # TODO: use regex to split up the query string and swap out/append the explicit params. At the moment
                # multiple values could be passed for the same category, leading to possible data leaks
                query_fragments = []

                for explicit_param in explicit_query_string_overrides:
                    query_fragments.append(u'{}="{}"'.format(explicit_param[0],
                                                             explicit_param[1].replace(',', '\,').replace('+',
                                                                                                          '\+').strip()))

                explicit_string = u' AND '.join(query_fragments)
                if explicit_string:
                    query_string = u'{} {}'.format(query_string, explicit_string)

            if cursor_support:
                if existing_cursor:
                    cursor = search.Cursor(web_safe_string=existing_cursor)
                else:
                    cursor = search.Cursor()

            if sort_options:
                parsed_options = [search.SortExpression(expression=sort_option[0],
                                                        direction=sort_option[1],
                                                        default_value=sort_option[2]) for sort_option in sort_options]
                compiled_sort_options = search.SortOptions(expressions=parsed_options, limit=sort_limit)

            if returned_expressions:
                compiled_field_expressions = [search.FieldExpression(name=field_exp[0], expression=field_exp[1]) for
                                              field_exp in returned_expressions]

            options = search.QueryOptions(
                ids_only=ids_only,
                limit=limit,
                snippeted_fields=snippeted_fields,
                number_found_accuracy=number_found_accuracy,
                returned_fields=returned_fields,
                returned_expressions=compiled_field_expressions,
                sort_options=compiled_sort_options,
                offset=offset,
                cursor=cursor,
            )

            query = search.Query(query_string=query_string, options=options)
            try:
                return cls.index.search_async(query=query)
            except (search.Error, TypeError, ValueError):
                logging.exception(u"Query {0} in {1} failed.".format(query_string, cls.index.name))
                raise
예제 #6
0
class Product(BaseDocumentManager):
    """Provides helper methods to manage Product documents.  All Product documents
  built using these methods will include a core set of fields (see the
  _buildCoreProductFields method).  We use the given product id (the Product
  entity key) as the doc_id.  This is not required for the entity/document
  design-- each explicitly point to each other, allowing their ids to be
  decoupled-- but using the product id as the doc id allows a document to be
  reindexed given its product info, without having to fetch the
  existing document."""

    _INDEX_NAME = config.PRODUCT_INDEX_NAME

    # 'core' product document field names
    PID = 'pid'
    DESCRIPTION = 'description'
    CATEGORY = 'category'
    PRODUCT_NAME = 'name'
    PRICE = 'price'
    AVG_RATING = 'ar'  #average rating
    UPDATED = 'modified'

    _SORT_OPTIONS = [
        [
            AVG_RATING, 'average rating',
            search.SortExpression(expression=AVG_RATING,
                                  direction=search.SortExpression.DESCENDING,
                                  default_value=0)
        ],
        [
            PRICE,
            'price',
            search.SortExpression(
                # other examples:
                # expression='max(price, 14.99)'
                # If you access _score in your sort expressions,
                # your SortOptions should include a scorer.
                # e.g. search.SortOptions(match_scorer=search.MatchScorer(),...)
                # Then, you can access the score to build expressions like:
                # expression='price * _score'
                expression=PRICE,
                direction=search.SortExpression.ASCENDING,
                default_value=9999)
        ],
        [
            UPDATED, 'modified',
            search.SortExpression(expression=UPDATED,
                                  direction=search.SortExpression.DESCENDING,
                                  default_value=1)
        ],
        [
            CATEGORY, 'category',
            search.SortExpression(expression=CATEGORY,
                                  direction=search.SortExpression.ASCENDING,
                                  default_value='')
        ],
        [
            PRODUCT_NAME, 'product name',
            search.SortExpression(expression=PRODUCT_NAME,
                                  direction=search.SortExpression.ASCENDING,
                                  default_value='zzz')
        ]
    ]

    _SORT_MENU = None
    _SORT_DICT = None

    @classmethod
    def deleteAllInProductIndex(cls):
        cls.deleteAllInIndex()

    @classmethod
    def getSortMenu(cls):
        if not cls._SORT_MENU:
            cls._buildSortMenu()
        return cls._SORT_MENU

    @classmethod
    def getSortDict(cls):
        if not cls._SORT_DICT:
            cls._buildSortDict()
        return cls._SORT_DICT

    @classmethod
    def _buildSortMenu(cls):
        """Build the default set of sort options used for Product search.
    Of these options, all but 'relevance' reference core fields that
    all Products will have."""
        res = [(elt[0], elt[1]) for elt in cls._SORT_OPTIONS]
        cls._SORT_MENU = [('relevance', 'relevance')] + res

    @classmethod
    def _buildSortDict(cls):
        """Build a dict that maps sort option keywords to their corresponding
    SortExpressions."""
        cls._SORT_DICT = {}
        for elt in cls._SORT_OPTIONS:
            cls._SORT_DICT[elt[0]] = elt[2]

    @classmethod
    def getDocFromPid(cls, pid):
        """Given a pid, get its doc. We're using the pid as the doc id, so we can
    do this via a direct fetch."""
        return cls.getDoc(pid)

    @classmethod
    def removeProductDocByPid(cls, pid):
        """Given a doc's pid, remove the doc matching it from the product
    index."""
        cls.removeDocById(pid)

    @classmethod
    def updateRatingInDoc(cls, doc_id, avg_rating):
        # get the associated doc from the doc id in the product entity
        doc = cls.getDoc(doc_id)
        if doc:
            pdoc = cls(doc)
            pdoc.setAvgRating(avg_rating)
            # The use of the same id will cause the existing doc to be reindexed.
            return doc
        else:
            raise errors.OperationFailedError(
                'Could not retrieve doc associated with id %s' % (doc_id, ))

    @classmethod
    def updateRatingsInfo(cls, doc_id, avg_rating):
        """Given a models.Product entity, update and reindex the associated
    document with the product entity's current average rating. """

        ndoc = cls.updateRatingInDoc(doc_id, avg_rating)
        # reindex the returned updated doc
        return cls.add(ndoc)


# 'accessor' convenience methods

    def getPID(self):
        """Get the value of the 'pid' field of a Product doc."""
        return self.getFieldVal(self.PID)

    def getName(self):
        """Get the value of the 'name' field of a Product doc."""
        return self.getFieldVal(self.PRODUCT_NAME)

    def getDescription(self):
        """Get the value of the 'description' field of a Product doc."""
        return self.getFieldVal(self.DESCRIPTION)

    def getCategory(self):
        """Get the value of the 'cat' field of a Product doc."""
        return self.getFieldVal(self.CATEGORY)

    def setCategory(self, cat):
        """Set the value of the 'cat' (category) field of a Product doc."""
        return self.setFirstField(
            search.NumberField(name=self.CATEGORY, value=cat))

    def getAvgRating(self):
        """Get the value of the 'ar' (average rating) field of a Product doc."""
        return self.getFieldVal(self.AVG_RATING)

    def setAvgRating(self, ar):
        """Set the value of the 'ar' field of a Product doc."""
        return self.setFirstField(
            search.NumberField(name=self.AVG_RATING, value=ar))

    def getPrice(self):
        """Get the value of the 'price' field of a Product doc."""
        return self.getFieldVal(self.PRICE)

    @classmethod
    def generateRatingsBuckets(cls, query_string):
        """Builds a dict of ratings 'buckets' and their counts, based on the
    value of the 'avg_rating" field for the documents retrieved by the given
    query.  See the 'generateRatingsLinks' method.  This information will
    be used to generate sidebar links that allow the user to drill down in query
    results based on rating.

    For demonstration purposes only; this will be expensive for large data
    sets.
    """

        # do the query on the *full* search results
        # to generate the facet information, imitating what may in future be
        # provided by the FTS API.
        try:
            sq = search.Query(query_string=query_string.strip())
            search_results = cls.getIndex().search(sq)
        except search.Error:
            logging.exception('An error occurred on search.')
            return None

        ratings_buckets = collections.defaultdict(int)
        # populate the buckets
        for res in search_results:
            ratings_buckets[int((cls(res)).getAvgRating() or 0)] += 1
        return ratings_buckets

    @classmethod
    def generateRatingsLinks(cls, query, phash):
        """Given a dict of ratings 'buckets' and their counts,
    builds a list of html snippets, to be displayed in the sidebar when
    showing results of a query. Each is a link that runs the query, additionally
    filtered by the indicated ratings interval."""

        ratings_buckets = cls.generateRatingsBuckets(query)
        if not ratings_buckets:
            return None
        rlist = []
        for k in range(config.RATING_MIN, config.RATING_MAX + 1):
            try:
                v = ratings_buckets[k]
            except KeyError:
                return
            # build html
            if k < 5:
                htext = '%s-%s (%s)' % (k, k + 1, v)
            else:
                htext = '%s (%s)' % (k, v)
            phash['rating'] = k
            hlink = '/psearch?' + urllib.urlencode(phash)
            rlist.append((hlink, htext))
        return rlist

    @classmethod
    def _buildCoreProductFields(cls, pid, name, description, category,
                                category_name, price):
        """Construct a 'core' document field list for the fields common to all
    Products. The various categories (as defined in the file 'categories.py'),
    may add additional specialized fields; these will be appended to this
    core list. (see _buildProductFields)."""
        fields = [
            search.TextField(name=cls.PID, value=pid),
            # The 'updated' field is always set to the current date.
            search.DateField(name=cls.UPDATED,
                             value=datetime.datetime.now().date()),
            search.TextField(name=cls.PRODUCT_NAME, value=name),
            # strip the markup from the description value, which can
            # potentially come from user input.  We do this so that
            # we don't need to sanitize the description in the
            # templates, showing off the Search API's ability to mark up query
            # terms in generated snippets.  This is done only for
            # demonstration purposes; in an actual app,
            # it would be preferrable to use a library like Beautiful Soup
            # instead.
            # We'll let the templating library escape all other rendered
            # values for us, so this is the only field we do this for.
            search.TextField(name=cls.DESCRIPTION,
                             value=re.sub(r'<[^>]*?>', '', description)),
            search.AtomField(name=cls.CATEGORY, value=category),
            search.NumberField(name=cls.AVG_RATING, value=0.0),
            search.NumberField(name=cls.PRICE, value=price)
        ]
        return fields

    @classmethod
    def _buildProductFields(cls,
                            pid=None,
                            category=None,
                            name=None,
                            description=None,
                            category_name=None,
                            price=None,
                            **params):
        """Build all the additional non-core fields for a document of the given
    product type (category), using the given params dict, and the
    already-constructed list of 'core' fields.  All such additional
    category-specific fields are treated as required.
    """

        fields = cls._buildCoreProductFields(pid, name, description, category,
                                             category_name, price)
        # get the specification of additional (non-'core') fields for this category
        pdict = categories.product_dict.get(category_name)
        if pdict:
            # for all fields
            for k, field_type in pdict.iteritems():
                # see if there is a value in the given params for that field.
                # if there is, get the field type, create the field, and append to the
                # document field list.
                if k in params:
                    v = params[k]
                    if field_type == search.NumberField:
                        try:
                            val = float(v)
                            fields.append(search.NumberField(name=k,
                                                             value=val))
                        except ValueError:
                            error_message = (
                                'bad value %s for field %s of type %s' %
                                (k, v, field_type))
                            logging.error(error_message)
                            raise errors.OperationFailedError(error_message)
                    elif field_type == search.TextField:
                        fields.append(search.TextField(name=k, value=str(v)))
                    else:
                        # you may want to add handling of other field types for generality.
                        # Not needed for our current sample data.
                        logging.warn('not processed: %s, %s, of type %s', k, v,
                                     field_type)
                else:
                    error_message = (
                        'value not given for field "%s" of field type "%s"' %
                        (k, field_type))
                    logging.warn(error_message)
                    raise errors.OperationFailedError(error_message)
        else:
            # else, did not have an entry in the params dict for the given field.
            logging.warn(
                'product field information not found for category name %s',
                params['category_name'])
        return fields

    @classmethod
    def _createDocument(cls,
                        pid=None,
                        category=None,
                        name=None,
                        description=None,
                        category_name=None,
                        price=None,
                        **params):
        """Create a Document object from given params."""
        # check for the fields that are always required.
        if pid and category and name:
            # First, check that the given pid has only visible ascii characters,
            # and does not contain whitespace.  The pid will be used as the doc_id,
            # which has these requirements.
            if not cls.isValidDocId(pid):
                raise errors.OperationFailedError("Illegal pid %s" % pid)
            # construct the document fields from the params
            resfields = cls._buildProductFields(pid=pid,
                                                category=category,
                                                name=name,
                                                description=description,
                                                category_name=category_name,
                                                price=price,
                                                **params)
            # build and index the document.  Use the pid (product id) as the doc id.
            # (If we did not do this, and left the doc_id unspecified, an id would be
            # auto-generated.)
            d = search.Document(doc_id=pid, fields=resfields)
            return d
        else:
            raise errors.OperationFailedError('Missing parameter.')

    @classmethod
    def _normalizeParams(cls, params):
        """Normalize the submitted params for building a product."""

        params = copy.deepcopy(params)
        try:
            params['pid'] = params['pid'].strip()
            params['name'] = params['name'].strip()
            params['category_name'] = params['category']
            params['category'] = params['category']
            try:
                params['price'] = float(params['price'])
            except ValueError:
                error_message = 'bad price value: %s' % params['price']
                logging.error(error_message)
                raise errors.OperationFailedError(error_message)
            return params
        except KeyError as e1:
            logging.exception("key error")
            raise errors.OperationFailedError(e1)
        except errors.Error as e2:
            logging.debug('Problem with params: %s: %s' %
                          (params, e2.error_message))
            raise errors.OperationFailedError(e2.error_message)

    @classmethod
    def buildProductBatch(cls, rows):
        """Build product documents and their related datastore entities, in batch,
    given a list of params dicts.  Should be used for new products, as does not
    handle updates of existing product entities. This method does not require
    that the doc ids be tied to the product ids, and obtains the doc ids from
    the results of the document add."""

        docs = []
        dbps = []
        for row in rows:
            try:
                params = cls._normalizeParams(row)
                doc = cls._createDocument(**params)
                docs.append(doc)
                # create product entity, sans doc_id
                dbp = models.Product(id=params['pid'],
                                     price=params['price'],
                                     category=params['category'])
                dbps.append(dbp)
            except errors.OperationFailedError:
                logging.error('error creating document from data: %s', row)
        try:
            add_results = cls.add(docs)
        except search.Error:
            logging.exception('Add failed')
            return
        if len(add_results) != len(dbps):
            # this case should not be reached; if there was an issue,
            # search.Error should have been thrown, above.
            raise errors.OperationFailedError(
                'Error: wrong number of results returned from indexing operation'
            )
        # now set the entities with the doc ids, the list of which are returned in
        # the same order as the list of docs given to the indexers
        for i, dbp in enumerate(dbps):
            dbp.doc_id = add_results[i].id
        # persist the entities
        ndb.put_multi(dbps)

    @classmethod
    def buildProduct(cls, params):
        """Create/update a product document and its related datastore entity.  The
    product id and the field values are taken from the params dict.
    """
        params = cls._normalizeParams(params)
        # check to see if doc already exists.  We do this because we need to retain
        # some information from the existing doc.  We could skip the fetch if this
        # were not the case.
        curr_doc = cls.getDocFromPid(params['pid'])
        d = cls._createDocument(**params)
        if curr_doc:  #  retain ratings info from existing doc
            avg_rating = cls(curr_doc).getAvgRating()
            cls(d).setAvgRating(avg_rating)

        # This will reindex if a doc with that doc id already exists
        doc_ids = cls.add(d)
        try:
            doc_id = doc_ids[0].id
        except IndexError:
            doc_id = None
            raise errors.OperationFailedError('could not index document')
        logging.debug('got new doc id %s for product: %s', doc_id,
                      params['pid'])

        # now update the entity
        def _tx():
            # Check whether the product entity exists. If so, we want to update
            # from the params, but preserve its ratings-related info.
            prod = models.Product.get_by_id(params['pid'])
            if prod:  #update
                prod.update_core(params, doc_id)
            else:  # create new entity
                prod = models.Product.create(params, doc_id)
            prod.put()
            return prod

        prod = ndb.transaction(_tx)
        logging.debug('prod: %s', prod)
        return prod
예제 #7
0
    def post(self):
        template_values = get_template_values(self)
        user = users.get_current_user()

        if self.request.get('next'):
            cursor = search.Cursor(web_safe_string=self.request.get('next'))
        else:
            cursor = search.Cursor()

        q = query = self.request.get("search-text").replace(',', "")
        order = self.request.get("search-order")
        completed = True if self.request.get(
            "search-completed") == "on" else False

        template_values["query_values"] = {
            'query': query,
            'order': order,
            'completed': completed,
        }

        if order == "rating":
            sort_exp = search.SortExpression(
                expression='rating',
                direction=search.SortExpression.DESCENDING,
                default_value=0)
        elif order == "times_taken":
            sort_exp = search.SortExpression(
                expression='times_taken',
                direction=search.SortExpression.DESCENDING,
                default_value=0)
        elif order == "date_inc":
            sort_exp = search.SortExpression(
                expression='date',
                direction=search.SortExpression.DESCENDING,
                default_value=0)
        elif order == "date_dec":
            sort_exp = search.SortExpression(
                expression='date',
                direction=search.SortExpression.ASCENDING,
                default_value=0)
        elif order == "level_dec":
            sort_exp = search.SortExpression(
                expression='level',
                direction=search.SortExpression.DESCENDING,
                default_value=0)
        elif order == "level_inc":
            sort_exp = search.SortExpression(
                expression='level',
                direction=search.SortExpression.ASCENDING,
                default_value=0)

        query_options = search.QueryOptions(
            limit=self.page_depth,
            cursor=cursor,
            sort_options=search.SortOptions(expressions=[
                sort_exp,
            ]),
        )

        query_obj = search.Query(query_string=query, options=query_options)
        results = search.Index(name="tests").search(query=query_obj)
        template_values["query_results"] = []

        for document in results:
            test = Test.query(Test.id == document.doc_id).get()
            if completed and user:
                # If the "Hide completed" checkbox is selected by the user
                if Mark.query(Mark.taker_entity.id == user.user_id(),
                              Mark.test.id == test.id).get() != None:
                    # And a Mark has been created
                    continue  # Don't add it to the list.
                    # If this continue is active, this selects out TAKEN tests
                    # Otherwise , this if statement selects out MARKED tests
                    if Mark.query(Mark.complete == False).get() == None:
                        # And the Test has been marked as completed for this user.
                        continue  # Don't add it to the list.
            template_values["query_results"].append(test)

        path = os.path.join(os.path.dirname(__file__),
                            os.path.join(template_dir, 'main.html'))
        self.response.out.write(template.render(path, template_values))
        return
예제 #8
0
class SearchTest(loanertest.EndpointsTestCase, parameterized.TestCase):
    @parameterized.parameters((
        shelf_messages.Shelf(location='NY', capacity=50),
        'location:NY capacity:50 enabled:True',
    ), (
        shelf_messages.Shelf(location='NY', capacity=50, enabled=False),
        'location:NY capacity:50 enabled:False',
    ))
    def test_to_query(self, message, expected_query):
        """Tests the creation of a valid search query from ndb properties."""
        query = search_utils.to_query(message, shelf_model.Shelf)
        #  The query is split because ndb properties are unordered when called by
        #  model_class._properties. This test would be flaky otherwise.
        self.assertCountEqual(query.split(' '), expected_query.split(' '))

    @mock.patch.object(search_utils, 'logging', autospec=True)
    def test_document_to_message(self, mock_logging):
        """Tests the creation of a protorpc message from a search document."""
        test_search_document = search.ScoredDocument(
            doc_id='test_doc_id',
            fields=[
                search.NumberField(name='capacity', value=20.0),
                search.TextField(name='location', value='US MTV'),
                search.AtomField(name='location', value='US-MTV'),
                search.AtomField(name='enabled', value='True'),
                search.GeoField(name='lat_long',
                                value=search.GeoPoint(52.37, 4.88)),
                search.TextField(name='not_present', value='MTV')
            ])
        expected_message = shelf_messages.Shelf(enabled=True,
                                                location='US-MTV',
                                                capacity=20,
                                                latitude=52.37,
                                                longitude=4.88)

        response_message = search_utils.document_to_message(
            test_search_document, shelf_messages.Shelf())
        self.assertEqual(response_message, expected_message)
        self.assertTrue(response_message.enabled)
        assert mock_logging.error.call_count == 1

    def test_get_search_cursor(self):
        """Tests the creation of a search cursor with a web_safe_string."""
        expected_cursor_web_safe_string = 'False:ODUxODBhNTgyYTQ2ZmI0MDU'
        returned_cursor = (
            search_utils.get_search_cursor(expected_cursor_web_safe_string))
        self.assertEqual(expected_cursor_web_safe_string,
                         returned_cursor.web_safe_string)

    @mock.patch.object(search, 'Cursor', autospec=True)
    def test_get_search_cursor_error(self, mock_cursor):
        """Tests the creation of a search cursor when an error occurs."""
        mock_cursor.side_effect = ValueError
        with self.assertRaisesWithLiteralMatch(endpoints.BadRequestException,
                                               search_utils._CORRUPT_KEY_MSG):
            search_utils.get_search_cursor(None)

    @parameterized.named_parameters(
        {
            'testcase_name': 'QueryStringOnly',
            'request':
            shared_messages.SearchRequest(query_string='enrolled:True'),
            'expected_values': ('enrolled:True', None, [])
        },
        {
            'testcase_name':
            'QueryStringWithReturnedFields',
            'request':
            shared_messages.SearchRequest(query_string='location:US-NYC',
                                          returned_fields=['location']),
            'expected_values': ('location:US-NYC', None, ['location'])
        },
    )
    def test_set_search_query_options(self, request, expected_values):
        """Tests setting the query options without sort options from message."""
        returned_query, returned_sort_options, returned_returned_fields = (
            search_utils.set_search_query_options(request))
        expected_query, expected_sort_options, expcted_returned_fields = (
            expected_values)
        self.assertEqual(expected_sort_options, returned_sort_options)
        self.assertEqual(expected_query, returned_query)
        self.assertEqual(expcted_returned_fields, returned_returned_fields)

    @parameterized.named_parameters(
        {
            'testcase_name':
            'ExpressionWithDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(
                        expression='enrolled',
                        direction=shared_messages.SortDirection.ASCENDING)
                ]),
            'expected_sort_options_expressions': [
                search.SortExpression(
                    expression='enrolled',
                    direction=search.SortExpression.ASCENDING)
            ]
        },
        {
            'testcase_name':
            'MultipleExpressionsWithDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(
                        expression='enrolled',
                        direction=shared_messages.SortDirection.ASCENDING),
                    shared_messages.SearchExpression(
                        expression='serial_number',
                        direction=shared_messages.SortDirection.DESCENDING)
                ]),
            'expected_sort_options_expressions': [
                search.SortExpression(
                    expression='enrolled',
                    direction=search.SortExpression.ASCENDING),
                search.SortExpression(
                    expression='serial_number',
                    direction=search.SortExpression.DESCENDING)
            ]
        },
        {
            'testcase_name':
            'ExpressionWithoutDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(expression='enrolled')
                ]),
            'expected_sort_options_expressions':
            [search.SortExpression(expression='enrolled')]
        },
        {
            'testcase_name':
            'MultipleExpressionsWithoutDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(expression='enrolled'),
                    shared_messages.SearchExpression(
                        expression='serial_number')
                ]),
            'expected_sort_options_expressions': [
                search.SortExpression(
                    expression='enrolled',
                    direction=search.SortExpression.DESCENDING),
                search.SortExpression(
                    expression='serial_number',
                    direction=search.SortExpression.DESCENDING)
            ]
        },
    )
    def test_set_search_query_options_with_sort_options(
            self, request, expected_sort_options_expressions):
        """Tests setting query options with sort options from message."""
        returned_query, returned_sort_options, returned_returned_fields = (
            search_utils.set_search_query_options(request))
        del returned_query  # Unused.
        del returned_returned_fields  # Unused.
        for i in range(len(returned_sort_options.expressions)):
            self.assertEqual(returned_sort_options.expressions[i].expression,
                             expected_sort_options_expressions[i].expression)
            self.assertEqual(returned_sort_options.expressions[i].direction,
                             expected_sort_options_expressions[i].direction)
예제 #9
0
    def get(self):
        # QUERY STRING
        q = self.request.get('q', default_value='')
        mobile_coupon = self.request.get('mobile_coupon', default_value='')
        latlong = self.request.get('latlong', default_value='')

        results = []
        number_found = 0
        try:
            index = search.Index(name='sample')
            # 位置情報で並び替え
            expressions = []
            if latlong:
                expressions.append(
                    search.SortExpression(
                        expression='distance(location, geopoint(%s))' %
                        latlong,
                        direction=search.SortExpression.ASCENDING,
                        default_value=None))
            # ソートキーの設定
            sort_opts = search.SortOptions(match_scorer=search.MatchScorer(),
                                           expressions=expressions)

            # 結果フィールドの設定
            options = search.QueryOptions(limit=100,
                                          returned_fields=[
                                              'name', 'content', 'image',
                                              'address', 'tel', 'location'
                                          ],
                                          snippeted_fields=['content'],
                                          sort_options=sort_opts,
                                          number_found_accuracy=10000,
                                          cursor=None)

            # 検索クエリの編集
            query_string = u''
            if q:
                query_string = u'(content:(%s) OR name:(%s))' % (q, q)
            if mobile_coupon:
                query_string += u' mobile_coupon:(%s)' % (mobile_coupon)

            # 検索実行
            query = search.Query(query_string=query_string, options=options)
            documents = index.search(query)

            # 検索結果
            number_found = documents.number_found
            for document in documents:
                # スニペット編集
                expressions = []
                for expression in document.expressions:
                    expressions.append(expression.value)
                results.append({
                    'doc_id': document.doc_id,
                    'name': document.field('name').value,
                    'content': document.field('content').value,
                    'image': document.field('image').value,
                    'snippet': ''.join(expressions),
                    'address': document.field('address').value,
                    'tel': document.field('tel').value
                })
            # logging.info("#-- SearchHandler : results:%s" % (results))

        except Exception as e:
            logging.exception("#-- SearchHandler Error: id:%s exception:%s" %
                              (id, e))

        template = JINJA_ENVIRONMENT.get_template('index.html')
        self.response.write(
            template.render({
                'q': q,
                'mobile_coupon': mobile_coupon,
                'latlong': latlong,
                'results': results,
                'number_found': number_found
            }))
예제 #10
0
파일: search.py 프로젝트: aarsakian/blog
from google.appengine.api import search, memcache
from google.appengine.ext import db
import logging

from flask import request, jsonify
from datetime import datetime, timedelta, date

# sort results by author descending
expr_list = [
    search.SortExpression(expression='title',
                          default_value='',
                          direction=search.SortExpression.DESCENDING)
]
# construct the sort options
sort_opts = search.SortOptions(expressions=expr_list)

query_options = search.QueryOptions(limit=10, sort_options=sort_opts)

_INDEX_NAME = "posts"


def delete_document(document_id):
    """deletes document from search index"""
    doc_index = search.Index(name=_INDEX_NAME)
    doc_index.delete(str(document_id))


def add_document_in_search_index(doc_id, title, body, summary, category,
                                 timestamp, tags):
    document = create_document(doc_id, title, body, summary, category,
                               timestamp, tags)
예제 #11
0
    def get_grouped_documents(self, application_id, keyword, index,
                              document_model):
        """
        Get document groups by application id, search keyword and index

        :param application_id: application id
        :param keyword: search keywords
        :param index: pager index
        :param document_model: document type
        :returns: [document_group], total / [], 0
        """
        if index is None: index = 0
        try:
            application_id = long(application_id)
            index = int(index)
        except:
            return [], 0

        # check auth
        aps = ApplicationService()
        if not aps.is_my_application(application_id):
            return [], 0

        result = []
        query_string = ''
        if keyword and len(keyword.strip()) > 0:
            source = [item for item in keyword.split(' ') if len(item) > 0]
            plus = [item for item in source if item.find('-') != 0]
            minus = [
                item[1:] for item in source
                if item.find('-') == 0 and len(item) > 1
            ]

            if len(plus) > 0:
                keyword = ' '.join(plus)
                query_string = '(name:{1}) OR (email:{1}) OR (description:{1}) OR (ip:{1}) OR (title:{1}) OR (status:{1})'.replace(
                    '{1}', keyword)
            if len(minus) > 0:
                keyword = ' '.join(minus)
                query_string = 'NOT ((name:{1}) OR (email:{1}) OR (description:{1}) OR (ip:{1}) OR (title:{1}) OR (status:{1}))'.replace(
                    '{1}', keyword)
        cache_key = MemcacheKey.document_search(application_id, document_model)
        cache_value = memcache.get(key=cache_key)
        if cache_value and keyword + str(index) in cache_value:
            # return from cache
            return cache_value[keyword + str(index)]['result'], cache_value[
                keyword + str(index)]['count']

        create_time_desc = search.SortExpression(
            expression='create_time',
            direction=search.SortExpression.DESCENDING,
            default_value=0)
        options = search.QueryOptions(
            offset=config.page_size * index,
            limit=config.page_size,
            sort_options=search.SortOptions(expressions=[create_time_desc],
                                            limit=1000),
            returned_fields=[
                'title', 'name', 'times', 'description', 'email', 'create_time'
            ])
        query = search.Query(query_string=query_string, options=options)
        try:
            if document_model == DocumentModel.exception:
                # search data from ExceptionModel
                documents = search.Index(
                    namespace='ExceptionModel',
                    name=str(application_id)).search(query)
            elif document_model == DocumentModel.log:
                # search data from LogModel
                documents = search.Index(
                    namespace='LogModel',
                    name=str(application_id)).search(query)
            else:
                # search data from CrashModel
                documents = search.Index(
                    namespace='CrashModel',
                    name=str(application_id)).search(query)
        except:
            # schema missing
            return [], 0

        for document in documents:
            result.append({
                'group_tag':
                document.doc_id,
                'title':
                document.field('title').value,
                'name':
                document.field('name').value,
                'times':
                int(document.field('times').value),
                'description':
                document.field('description').value,
                'email':
                document.field('email').value,
                'create_time':
                document.field('create_time').value.strftime(
                    '%Y-%m-%dT%H:%M:%S.%fZ')
            })

        # if number of documents over maximum then return the maximum
        if documents.number_found > 1000 + config.page_size:
            count = 1000 + config.page_size
        else:
            count = documents.number_found

        # set memory cache for 12 hours
        if cache_value is None:
            cache_value = {
                keyword + str(index): {
                    'result': result,
                    'count': count
                }
            }
            memcache.set(key=cache_key, value=cache_value, time=43200)
        else:
            cache_value[keyword + str(index)] = {
                'result': result,
                'count': count
            }
            memcache.set(key=cache_key, value=cache_value, time=43200)

        return result, count
예제 #12
0
    def get(self):
        # データブックの名前を取得
        databook_name = get_databook_name(self.request.get('db'))
        # データブックの表示タイトルを取得
        databook_title = get_databook_title(self.request.get('db'))
        # 全文検索用インデックスの名前を取得
        databook_indexname = get_databook_indexname(self.request.get('db'))
        # 表示メッセージの初期化
        message_data = ''

        # 管理者ログインのチェック
        admin_login = False
        if users.is_current_user_admin():
            admin_login = True

        # 管理者ログイン中の表示
        admin_message = ''
        if users.get_current_user():
            if admin_login:
                admin_message = '(管理者としてログインしています)'
            else:
                admin_message = '(管理者ではありません)'

        # ログイン/ログアウトURL設定
        if users.get_current_user():
            login_url = users.create_logout_url(self.request.uri)
            login_text = '[ログアウト]'
        else:
            login_url = users.create_login_url(self.request.uri)
            # login_text = '[ログイン]'
            login_text = '[管理]'

        # 書き込み禁止の判定
        write_disabled_message = ''
        if not capabilities.CapabilitySet('datastore_v3',
                                          ['write']).is_enabled():
            write_disabled_message = '【現在書き込みは禁止しています】'

        # 全文検索の単語を取得
        search_flag = False
        search_count = 0
        search_word = self.request.get('word').strip()
        # 全文検索の単語の先頭が「=」のときは特別扱い
        show_all_flag = False
        show_offset = 0
        if search_word.startswith('='):
            i = 1
            while i < len(search_word):
                ch = search_word[i]
                # 「*」のときは表示フラグを無視して全て表示する
                if ch == '*':
                    i += 1
                    show_all_flag = True
                    continue
                # 数字のときは表示件数のオフセットとする
                if ch.isdigit():
                    i += 1
                    j = i - 1
                    while i < len(search_word):
                        ch = search_word[i]
                        if ch.isdigit():
                            i += 1
                            continue
                        break
                    k = i
                    if (k - j) > 5: k = j + 5
                    show_offset = int(search_word[j:k])
                    continue
                # その他のときは抜ける
                break
            search_word2 = search_word[i:]
        else:
            search_word2 = search_word
        # 全文検索の単語をチェック
        if search_word2:
            # 全文検索を行うとき
            articles = []
            # 検索結果を日付の降順でソートする指定
            expr_list = [
                search.SortExpression(
                    expression='date',
                    default_value=datetime.datetime.min,
                    direction=search.SortExpression.DESCENDING)
            ]
            # ソートオプションに設定する
            sort_opts = search.SortOptions(expressions=expr_list)
            # クエリーオプションに設定する
            # (表示件数指定、ソートオプション指定、検索結果はタイトルのみ取得)
            query_opts = search.QueryOptions(limit=mainpage_show_num,
                                             offset=show_offset,
                                             sort_options=sort_opts,
                                             returned_fields=['title'])
            try:
                # 単語とクエリーオプションを指定して全文検索実行
                query_obj = search.Query(query_string=search_word2,
                                         options=query_opts)
                search_results = search.Index(name=databook_indexname).search(
                    query=query_obj)
                # 検索結果から記事のタイトルを取得する
                req_titles = []
                for scored_doc in search_results:
                    req_titles.append(scored_doc.field('title').value)
                if len(req_titles) >= 1:
                    # 記事を検索(タイトルで表示件数まで)
                    if show_all_flag:
                        articles_query = Article.query(
                            Article.title.IN(req_titles),
                            ancestor=databook_key(databook_name)).order(
                                -Article.date)
                    else:
                        articles_query = Article.query(
                            Article.title.IN(req_titles),
                            Article.show_flag == 1,
                            ancestor=databook_key(databook_name)).order(
                                -Article.date)
                    articles = articles_query.fetch(mainpage_show_num)
            except (search.QueryError, search.InvalidRequest), e:
                # クエリーエラーのとき
                message_data = message_data + '(クエリーエラー:検索文字列に記号が含まれると発生することがあります)'
            search_flag = True
            search_count = len(articles)
예제 #13
0
def query(querystr, cursorstr, limit):
    expr = search.SortExpression(expression="_score * 1.0",
                                 direction=search.SortExpression.DESCENDING,
                                 default_value=0.0)

    # Sort up to 1000 matching results by subject in descending order
    sort = search.SortOptions(expressions=[expr], limit=1000)

    cursor = search.Cursor(web_safe_string=cursorstr)
    options = search.QueryOptions(
        limit=limit,  # the number of results to return
        cursor=cursor,
        sort_options=sort,
        returned_fields=["author", "tags", "title", "published"],
        snippeted_fields=["title", "content"],
    )

    query = search.Query(query_string=_escape(querystr), options=options)
    index = search.Index(name="article_index")
    results = index.search(query)
    searchlist = []
    for doc in results:
        postid = int(doc.doc_id)
        tags = doc["tags"][0].value.split(' ')
        date = doc["published"][0].value
        author = doc["author"][0].value
        title = ''
        content = ''
        for expr in doc.expressions:
            if expr.name == "content":
                content = expr.value
            elif expr.name == "title":
                title = expr.value
        searchlist.append({
            'postid':
            postid,
            "tags":
            tags,
            'content':
            _useem(content),
            'title':
            _useem(title),
            'author':
            author,
            'date': (date + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M'),
        })

    next_cursor = results.cursor
    if next_cursor:
        next_cursor_urlsafe = next_cursor.web_safe_string
    else:
        next_cursor_urlsafe = ''
    total = results.number_found

    return {
        'query': querystr,
        'size': len(searchlist),
        'total': total,
        'cursor': next_cursor_urlsafe,
        'list': searchlist,
    }
예제 #14
0
파일: views.py 프로젝트: jsa/whats-new-rc
def search(rq):
    ###
    # Temporary parameter rename redirect.
    #
    from .util import asciidict, unicodedict
    q = unicodedict(rq.GET)
    _redir = False
    for _from, _to in (('c', PARAM.CATEGORY),
                       ('p', PARAM.PAGE),
                       ('q', PARAM.SEARCH),
                       ('s', PARAM.SORT)):
        val = q.pop(_from, None)
        if val:
            q[_to] = val
            _redir = True
    if _redir:
        logging.info("Redirecting %r -> %r" % (rq.GET, q))
        return redir(rq.path + "?%s" % urllib.urlencode(asciidict(q)))
    #
    ###

    def page_q(page):
        return qset(PARAM.PAGE, page if page >= 2 else None)

    page = rq.GET.pop(PARAM.PAGE, None)
    if page:
        try:
            page = int(page)
        except ValueError:
            return not_found("Invalid page '%s'" % (page,))
        if page < 2:
            return redir(page_q(page))
    else:
        page = 1

    page_size = 72 # divisible by 2, 3, and 4
    page_limit = g_search.MAXIMUM_SEARCH_OFFSET / page_size + 1
    if page > page_limit:
        return redir(page_q(page_limit))

    sort = rq.GET.get(PARAM.SORT)
    if sort == SORT.CHEAP:
        sort = g_search.SortExpression(
                   'us_cents', g_search.SortExpression.ASCENDING)
    elif sort == SORT.DISCOUNT_AMT:
        sort = g_search.SortExpression(
                   'discount_us_cents', g_search.SortExpression.DESCENDING)
    elif sort == SORT.DISCOUNT_PC:
        sort = g_search.SortExpression(
                   'discount_pc', g_search.SortExpression.DESCENDING)
    elif sort == SORT.EXPENSIVE:
        sort = g_search.SortExpression(
                   'us_cents', g_search.SortExpression.DESCENDING)
    elif sort is not None:
        return redir(qset(PARAM.SORT))

    # Default sort is rank descending, and the rank is the added timestamp.
    # (note: rank would be referenced as "_rank")
    # sort = g_search.SortExpression('added', g_search.SortExpression.DESCENDING)

    if sort:
        sort = g_search.SortOptions(
                   [sort], limit=g_search.MAXIMUM_SORTED_DOCUMENTS)

    index = g_search.Index(ITEMS_INDEX)
    opts = g_search.QueryOptions(
               limit=page_size,
               number_found_accuracy=g_search.MAXIMUM_SORTED_DOCUMENTS
                                     if sort else
                                     g_search.MAXIMUM_SEARCH_OFFSET,
               offset=page_size * (page - 1),
               sort_options=sort)
    expr, filters = [], []

    search_q = rq.GET.get(PARAM.SEARCH)
    if search_q:
        search_q = re.sub(r"[^a-z0-9&_~#]", " ", search_q.lower().strip()) \
                     .strip()
    if search_q:
        expr.append(search_q)
        filters.append(('"%s"' % search_q, qset(PARAM.SEARCH)))

    cats = rq.GET.get(PARAM.CATEGORY)
    if cats:
        cats = cats.split(",")
        try:
            cats = map(int, cats)
        except ValueError:
            return not_found("Invalid categories %s" % (cats,))
        cats = nub(cats)
        cat_infos = map(get_categories().get, cats)
        if not all(cat_infos):
            return not_found("Invalid categories %s" % (cats,))
        cats = zip(cats, cat_infos)
        cat_ids = ['"%d"' % c[0] for c in cats]
        expr.append("categories:(%s)" % " OR ".join(cat_ids))
        cat_names = [c[1][1] for c in cats]
        filters.append((" OR ".join(cat_names), qset(PARAM.CATEGORY)))

    with log_latency("Search latency {:,d}ms"):
        rs = index.search(g_search.Query(" ".join(expr), opts), deadline=10)

    # limit to 1000
    num_found = min(rs.number_found, g_search.MAXIMUM_SEARCH_OFFSET)
    max_page = num_found / page_size
    if rs.number_found % page_size:
        max_page += 1
    max_page = max(min(max_page, page_limit), 1)

    if page > max_page:
        return redir(page_q(max_page))

    def paging():
        start_page = min(max(page - 5, 1),
                         max(max_page - 10, 1))
        end_page = min(start_page + 10, max_page)
        pages = [(p, page_q(p), p == page)
                 for p in range(start_page, end_page + 1)]

        if not pages:
            # zero results, not even a single page
            return

        if len(pages) > 4:
            if pages[0][0] > 1:
                pages[0] = (1, page_q(1), False)
                if pages[1][0] > 2:
                    pages[1] = (u"…",) + pages[1][1:]
            if pages[-1][0] < max_page:
                pages[-1] = (max_page, page_q(max_page), False)
                if pages[-2][0] < (max_page - 1):
                    pages[-2] = (u"…",) + pages[-2][1:]

        paging = {'range': pages}

        p_prev = filter(lambda p: p[0] == page - 1, pages)
        if p_prev:
            paging['prev'] = p_prev[0]

        p_next = filter(lambda p: p[0] == page + 1, pages)
        if p_next:
            paging['next'] = p_next[0]

        return paging

    with log_latency("get_categories() latency {:,d}ms"):
        cats = get_categories()

    with log_latency("ItemView latency {:,d}ms"):
        items = ItemView.make_views(rs.results, cats)

    ctx = {
        'items': items,
        'paging': paging(),
        'filters': filters,
        'warnings': [],
        'PARAM': PARAM,
        'SORT': SORT,
    }

    if rs.number_found < g_search.MAXIMUM_SEARCH_OFFSET:
        ctx['total_count'] = "{:,d}".format(rs.number_found)
    else:
        ctx['total_count'] = "{:,d}+".format(g_search.MAXIMUM_SEARCH_OFFSET)
        if rs.number_found >= g_search.MAXIMUM_SORTED_DOCUMENTS:
            ctx['warnings'].append(
                "Sorting may be missing items due to large number of hits")

    with log_latency("Render latency {:,d}ms"):
        return render("search.html", ctx)
예제 #15
0
    def search_content(self, params):
        index = search.Index(config.content_index)
        search_text = ''

        # This search_results objects has properties `number_found`, `results`,
        # and `cursor`. See
        # https://cloud.google.com/appengine/docs/python/search/searchresultsclass
        # search.Query docs:
        # https://cloud.google.com/appengine/docs/python/search/queryclass
        # search.QueryOptions docs:
        # https://cloud.google.com/appengine/docs/python/search/queryoptionsclass

        # Pagination using a 'page' argument pulls n arguments offset by n*page
        # pull first set of results with page=0
        page_size = 20
        offset = 0
        if 'page' in params:
            offset = int(params.pop('page')) * page_size

        # Build the SortOptions with 2 sort keys
        sort1 = search.SortExpression(
            expression='display_order',
            direction=search.SortExpression.DESCENDING,
            default_value=0)
        sort2 = search.SortExpression(
            expression='votes_for',
            direction=search.SortExpression.DESCENDING,
            default_value=0)
        sort3 = search.SortExpression(
            expression='created',
            direction=search.SortExpression.DESCENDING,
            default_value=0)
        sort_opts = search.SortOptions(expressions=[sort1, sort2, sort3])

        search_results = index.search(
            search.Query(query_string=self._stringify_search_params(params),
                         options=search.QueryOptions(
                             limit=page_size,
                             offset=offset,
                             snippeted_fields=['summary', 'body'],
                             sort_options=sort_opts,
                         )))
        for result in search_results.results:
            if 'Chapter_' in result.doc_id:
                book_id = None
                for field in result.fields:
                    if field.name == 'books':
                        book_id = field.value

                if book_id:
                    book = Book.get_by_id(book_id)
                    result.fields.append(search.TextField('bookUID', book.uid))
                    result.fields.append(
                        search.TextField('bookTitle', book.title))
                    result.fields.append(
                        search.TextField(
                            'bookIcon',
                            util.extract_value_from_json(book.icon, 'link')))
                    result.fields.append(
                        search.TextField(
                            'chapterNumber',
                            str(book.chapters.index(result.doc_id) + 1)))

        result_dicts = [
            util.search_document_to_dict(doc) for doc in search_results.results
        ]

        result_dicts = self._annotate_search_content(result_dicts)

        return result_dicts
예제 #16
0
    def get(self):
        self.response.headers['Content-Type'] = 'text/plain'
        search_query = str(
            urllib.unquote(cgi.escape(self.request.get('q')).lower()[:100]))
        name = str(
            urllib.unquote(cgi.escape(self.request.get('name')).lower()[:50]))
        token = str(urllib.unquote(cgi.escape(self.request.get('token'))))
        page_num = parse_int(
            urllib.unquote(cgi.escape(self.request.get('page_num'))), 1)
        page_size = parse_int(
            urllib.unquote(cgi.escape(self.request.get('page_size'))), 20)

        if search_query + name == "":
            d = {}
            d['data'] = []
            d['token'] = token
            d['q'] = ""
            s = json.dumps(d)
            self.response.out.write(s)
            return

        if search_query == "":
            search_query = name

        if page_size > _PAGE_SIZE or page_size < 1:
            page_size = _PAGE_SIZE

        # Flood Prevention
        ip = str(self.request.remote_addr)
        ipCount = memcache.get(ip)
        if ipCount is not None:
            if ipCount > 1000:
                d = {}
                d['data'] = 'Quota Exceeded'
                d['token'] = token
                d['q'] = search_query
                s = json.dumps(d)
                self.response.out.write(s)

                ban_time = 600 + 60 * 2**((ipCount - 1000))
                if ban_time > 7 * 24 * 60 * 60:
                    ban_time = 7 * 24 * 60 * 60
                logging.info('Quota exceeded for ' + ip + ', count at ' +
                             str(ipCount) + ', banned for ' + str(ban_time))
                memcache.replace(ip, ipCount + 1, time=ban_time)

                if (ipCount - 1001) % 100 == 0:
                    message = mail.EmailMessage(
                        sender=
                        "IP Banning <*****@*****.**>",
                        subject="RPIDirectory IP " + ip + " Banned")
                    message.to = "*****@*****.**"
                    message.body = "IP: " + ip + "\nban time: " + str(
                        ban_time
                    ) + "\nQuery: " + search_query + "\nHit Count: " + str(
                        ipCount)
                    message.send()
                    logging.info("EMail sent about ip: " + ip)
                return
            memcache.replace(ip, ipCount + 1, time=600)
        else:
            memcache.add(ip, 1, time=600)

        queries = map(str, search_query.split())
        queries = sorted(queries)
        query_string = ' AND '.join(queries)

        d = {}
        d["data"] = []
        d["token"] = token
        d["q"] = search_query

        data = memcache.get(query_string)

        if not data:
            data = []
            #Sort results by first name descending
            expr_list = [
                search.SortExpression(
                    expression='first_name',
                    default_value='',
                    direction=search.SortExpression.DESCENDING)
            ]
            # construct the sort options
            sort_opts = search.SortOptions(expressions=expr_list)
            offset_num = (page_num - 1) * page_size
            query_options = search.QueryOptions(limit=page_size,
                                                offset=offset_num,
                                                ids_only=True,
                                                sort_options=sort_opts)
            results = search.Index(name=_INDEX_NAME).search(query=search.Query(
                query_string=query_string, options=query_options))

            for result in results:
                rcsid = result.doc_id
                r = Person.get_by_id(rcsid)
                if r:
                    data.append(Person.buildMap(r))
            memcache.add(query_string, data, time=2419200)
        d["data"] = data
        s = json.dumps(d)
        self.response.out.write(s)
예제 #17
0
    def _render(self):
        new_search = not self._year or (not self._award_types and not self._seed and not self._playoff_level and not self._cad_model)
        if new_search:
            result_models = []
            num_results = 0
            result_expressions = None
        else:
            # Construct query string
            sort_options_expressions = []
            returned_expressions = []
            partial_queries = []

            search_index = search.Index(name=SearchHelper.TEAM_AWARDS_INDEX)

            partial_queries.append('year={}'.format(self._year))
            award_filter = ' OR '.join(['award={}'.format(award_type) for award_type in self._award_types])
            if award_filter:
                partial_queries.append(award_filter)

            if self._seed:
                seed_field_name = 'seed_{}'.format(self._seed)
                partial_queries.append('{}>0'.format(seed_field_name))
                returned_expressions.append(search.FieldExpression(
                    name='seed_count', expression=seed_field_name))

                if self._sort_field == 'seed':
                    sort_options_expressions.append(
                        search.SortExpression(
                            expression=seed_field_name,
                            direction=search.SortExpression.DESCENDING))

            if self._playoff_level:
                comp_level_name = 'comp_level_{}'.format(self.PLAYOFF_MAP[self._playoff_level])
                partial_queries.append('{}>0'.format(comp_level_name))
                returned_expressions.append(search.FieldExpression(
                    name='comp_level_count', expression=comp_level_name))

                if self._sort_field == 'playoff_level':
                    sort_options_expressions.append(
                        search.SortExpression(
                            expression=comp_level_name,
                            direction=search.SortExpression.DESCENDING))

            if self._cad_model:
                partial_queries.append('has_cad=1')

            query_string = ' AND ' .join(partial_queries)

            # Tiebreak sorting by number
            sort_options_expressions.append(
                search.SortExpression(
                    expression='number',
                    direction=search.SortExpression.ASCENDING))

            # Perform query
            query = search.Query(
                query_string=query_string,
                options=search.QueryOptions(
                    limit=self.PAGE_SIZE,
                    number_found_accuracy=10000,  # Larger than the number of possible results
                    offset=self.PAGE_SIZE * self._page,
                    sort_options=search.SortOptions(
                        expressions=sort_options_expressions
                    ),
                    returned_expressions=returned_expressions
                )
            )

            docs = search_index.search(query)
            num_results = docs.number_found
            model_keys = []
            result_expressions = defaultdict(lambda: defaultdict(float))
            for result in docs.results:
                team_key = result.doc_id.split('_')[0]
                model_keys.append(ndb.Key('Team', team_key))
                for expression in result.expressions:
                    result_expressions[team_key][expression.name] = expression.value

            model_futures = ndb.get_multi_async(model_keys)

            result_models = [model_future.get_result() for model_future in model_futures]

        self.template_values.update({
            'valid_years': self.VALID_YEARS,
            'valid_award_types': self.VALID_AWARD_TYPES,
            'num_special_awards': len(SORT_ORDER),
            'valid_seeds': self.VALID_SEEDS,
            'seed': self._seed,
            'playoff_level': self._playoff_level,
            'page_size': self.PAGE_SIZE,
            'max_results': self.MAX_RESULTS,
            'page': self._page,
            'year': self._year,
            'award_types': self._award_types,
            'cad_model': self._cad_model,
            'new_search': new_search,
            'num_results': num_results,
            'capped_num_results': min(self.MAX_RESULTS, num_results),
            'result_models': result_models,
            'result_expressions': result_expressions,
            'sort_field': self._sort_field,
        })

        return jinja2_engine.render('advanced_search.html', self.template_values)
예제 #18
0
파일: search.py 프로젝트: mplsart/arts-612
def simple_search(querystring=None, start=None, end=None, category=None, venue_slug=None, limit=100, sort=None):
    """
    TODO: "term", "near", "by type", "now" and any combo
    """

    logging.debug([start, end])

    if not querystring:
        querystring = ''

    # Now = started and hasn't ended yet
    if start:
        if querystring:
            querystring += ' AND '
        querystring += 'start <= %s' % unix_time(start)
    if end:
        if querystring:
            querystring += ' AND '
        querystring += 'end >= %s' % unix_time(end)

    if venue_slug:
        if querystring:
            querystring += ' AND '
        querystring += ' venue_slug: %s' % venue_slug

    if category:
        if querystring:
            querystring += ' AND '
        if isinstance(category, list):
            querystring += ' ('
            x = 0
            for c in category:
                if x > 0:
                    querystring += ' OR '
                querystring += ' category: %s' % c
                x += 1
            querystring += ' ) '
        else:
            querystring += 'category: %s' % category

    #DISTANCE_LIMIT = int(3 * 111) # 3 KM - 3 * 10,000km per 90 degrees
    #querystring += ' AND distance(venue_geo, geopoint(%s,%s)) < %s' % (44.958815,-93.238138, DISTANCE_LIMIT)

    sort_expressions = []

    if sort:
        direction = search.SortExpression.ASCENDING

        if sort[0] == '-':
            direction = search.SortExpression.DESCENDING
            sort = sort[1:]

        sort_expressions.append(search.SortExpression(expression=sort, direction=direction, default_value=0))

    sort_options = search.SortOptions(expressions=sort_expressions)

    q_options = search.QueryOptions(limit=limit, sort_options=sort_options)

    logging.debug('Performing a search with querystring: %s' % querystring)

    search_query = search.Query(query_string=querystring, options=q_options)

    index = get_search_index()
    search_results = index.search(search_query)

    # Show # of results
    returned_count = len(search_results.results)
    number_found = search_results.number_found

    return {'number_found': number_found, 'returned_count': returned_count,
            'index_results': search_results}
예제 #19
0
class SearchTest(loanertest.EndpointsTestCase, parameterized.TestCase):

    _ASSIGNED_DATE = datetime.datetime(year=2017, month=1, day=1)

    @parameterized.parameters((
        shelf_messages.Shelf(location='NY', capacity=50),
        'location:NY capacity:50 enabled:True',
    ), (
        shelf_messages.Shelf(location='NY', capacity=50, enabled=False),
        'location:NY capacity:50 enabled:False',
    ))
    def test_to_query(self, message, expected_query):
        """Tests the creation of a valid search query from ndb properties."""
        query = search_utils.to_query(message, shelf_model.Shelf)
        #  The query is split because ndb properties are unordered when called by
        #  model_class._properties. This test would be flaky otherwise.
        self.assertCountEqual(query.split(' '), expected_query.split(' '))

    @parameterized.named_parameters(
        ('Shelf Message', shelf_messages.Shelf(),
         search.ScoredDocument(
             doc_id='test_doc_id',
             fields=[
                 search.NumberField(name='capacity', value=20.0),
                 search.TextField(name='location', value='US MTV'),
                 search.AtomField(name='location', value='US-MTV'),
                 search.AtomField(name='enabled', value='True'),
                 search.GeoField(name='lat_long',
                                 value=search.GeoPoint(52.37, 4.88)),
                 search.TextField(name='not_present', value='MTV')
             ]),
         shelf_messages.Shelf(enabled=True,
                              location='US-MTV',
                              capacity=20,
                              latitude=52.37,
                              longitude=4.88), 1),
        ('Device Message', device_messages.Device(),
         search.ScoredDocument(
             doc_id='test_doc_id',
             fields=[
                 search.DateField(name='assignment_date',
                                  value=_ASSIGNED_DATE),
                 search.TextField(name='serial_number', value='1234'),
                 search.AtomField(name='enrolled', value='True'),
                 search.TextField(name='assigned_user', value='user')
             ]),
         device_messages.Device(
             enrolled=True,
             serial_number='1234',
             assigned_user='******',
             max_extend_date=_ASSIGNED_DATE + datetime.timedelta(days=14),
             assignment_date=_ASSIGNED_DATE), 0))
    def test_document_to_message(self, message, test_search_document,
                                 expected_message, log_call_count):
        """Tests the creation of a protorpc message from a search document."""
        with mock.patch.object(search_utils, 'logging',
                               autospec=True) as mock_logging:
            response_message = search_utils.document_to_message(
                test_search_document, message)
            self.assertEqual(response_message, expected_message)
            self.assertEqual(mock_logging.error.call_count, log_call_count)

    def test_calculate_page_offset(self):
        """Tests the calculation of page offset."""
        page_size = 10
        page_number = 5
        offset = search_utils.calculate_page_offset(page_size, page_number)
        self.assertEqual(40, offset)

    def test_calculate_total_pages(self):
        """Tests the calculation of total pages."""
        page_size = 6
        total_results = 11
        total_pages = search_utils.calculate_total_pages(
            page_size, total_results)
        self.assertEqual(2, total_pages)

    @parameterized.named_parameters(
        {
            'testcase_name': 'QueryStringOnly',
            'request':
            shared_messages.SearchRequest(query_string='enrolled:True'),
            'expected_values': ('enrolled:True', None, [])
        },
        {
            'testcase_name':
            'QueryStringWithReturnedFields',
            'request':
            shared_messages.SearchRequest(query_string='location:US-NYC',
                                          returned_fields=['location']),
            'expected_values': ('location:US-NYC', None, ['location'])
        },
    )
    def test_set_search_query_options(self, request, expected_values):
        """Tests setting the query options without sort options from message."""
        returned_query, returned_sort_options, returned_returned_fields = (
            search_utils.set_search_query_options(request))
        expected_query, expected_sort_options, expcted_returned_fields = (
            expected_values)
        self.assertEqual(expected_sort_options, returned_sort_options)
        self.assertEqual(expected_query, returned_query)
        self.assertEqual(expcted_returned_fields, returned_returned_fields)

    @parameterized.named_parameters(
        {
            'testcase_name':
            'ExpressionWithDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(
                        expression='enrolled',
                        direction=shared_messages.SortDirection.ASCENDING)
                ]),
            'expected_sort_options_expressions': [
                search.SortExpression(
                    expression='enrolled',
                    direction=search.SortExpression.ASCENDING)
            ]
        },
        {
            'testcase_name':
            'MultipleExpressionsWithDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(
                        expression='enrolled',
                        direction=shared_messages.SortDirection.ASCENDING),
                    shared_messages.SearchExpression(
                        expression='serial_number',
                        direction=shared_messages.SortDirection.DESCENDING)
                ]),
            'expected_sort_options_expressions': [
                search.SortExpression(
                    expression='enrolled',
                    direction=search.SortExpression.ASCENDING),
                search.SortExpression(
                    expression='serial_number',
                    direction=search.SortExpression.DESCENDING)
            ]
        },
        {
            'testcase_name':
            'ExpressionWithoutDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(expression='enrolled')
                ]),
            'expected_sort_options_expressions':
            [search.SortExpression(expression='enrolled')]
        },
        {
            'testcase_name':
            'MultipleExpressionsWithoutDirection',
            'request':
            shared_messages.SearchRequest(
                query_string='enrolled:True',
                expressions=[
                    shared_messages.SearchExpression(expression='enrolled'),
                    shared_messages.SearchExpression(
                        expression='serial_number')
                ]),
            'expected_sort_options_expressions': [
                search.SortExpression(
                    expression='enrolled',
                    direction=search.SortExpression.DESCENDING),
                search.SortExpression(
                    expression='serial_number',
                    direction=search.SortExpression.DESCENDING)
            ]
        },
    )
    def test_set_search_query_options_with_sort_options(
            self, request, expected_sort_options_expressions):
        """Tests setting query options with sort options from message."""
        returned_query, returned_sort_options, returned_returned_fields = (
            search_utils.set_search_query_options(request))
        del returned_query  # Unused.
        del returned_returned_fields  # Unused.
        for i in range(len(returned_sort_options.expressions)):
            self.assertEqual(returned_sort_options.expressions[i].expression,
                             expected_sort_options_expressions[i].expression)
            self.assertEqual(returned_sort_options.expressions[i].direction,
                             expected_sort_options_expressions[i].direction)
예제 #20
0
파일: views.py 프로젝트: emz0/old-projects
	def get(self, request, *args, **kwargs):
		"""
		**Description**: Method fetches top 20 tags from search database on loading and handles search queries on users. Users are
		sorted by amount of their uploaded datasets.

		
		"""
		active_index = "user_index"
		limit = 50
		page = 1
		results = []
		query_raw = ""

		if request.GET.get('search_bar') is not None:
			query_raw = request.GET.get('search_bar')
			if query_raw <> "":					
				final_query = "user_name_tokenized: "+SearchDatasetView.validate_search_term(query_raw)			
			else:
				final_query = ""
		else:
			query_raw = final_query = ""

		searchForm = SearchUsersForm(initial={'search_bar': query_raw})
		
		if request.GET.get('page') is not None and request.GET.get('page') != "":
			if int(request.GET.get('page')) == 0:
				page = 1
			else:
				page = int(request.GET.get('page'))
			offset = (limit * page) - limit
		else:
			offset = 0

		try:
			index = search.Index(active_index)

			sort1 = search.SortExpression(expression='dataset_counter', direction=search.SortExpression.DESCENDING, default_value=0)
			sortops = search.SortOptions(expressions=[sort1])


			options=search.QueryOptions(offset=offset,
				limit=limit,
				#sort_options = sortops,
				returned_fields=['user_name', 'dataset_counter','profile_image'],
				sort_options = sortops
				)
			searchQuery = search.Query(query_string=final_query, options=options)

			search_results = index.search(searchQuery)

			number_of_results = search_results.number_found	#number of all available results
			number_of_pages = int((number_of_results / limit)) + 1
			number_of_returned_results = len(search_results.results)


			for doc in search_results:
				user_name = doc.field('user_name').value
				dataset_count = int(doc.field('dataset_counter').value)
				profile_image = doc.field('profile_image').value
				results.append({'user_name':user_name,'dataset_count':dataset_count,'profile_image':profile_image})



		except search.Error:
			return render(request, self.template_name, {'error': 'We are sorry. Search failed. Try again later please.'})

		searchForm = SearchUsersForm(initial={'search_bar': query_raw})
		return render(request, self.template_name, {'form': searchForm,'number_of_results':number_of_results, 'results':results, 'page': page, 'number_of_pages':number_of_pages, 'search_query':query_raw})