Exemple #1
0
 def entityGenerator():
     id_set = set()
     try:
         for query in token_queries:
             tokenSearchQuery = formatSearchQuery(query)
             if not tokenSearchQuery:
                 continue
             mongo_query = {
                 'sources.tombstone_id' : {'$exists' : False},
                 'sources.user_generated_id' : {'$exists' : False},
                 '$and' : tokenSearchQuery,
             }
             mongo_query.update(kwargs)
             nemesis_ids = None
             if query_obj.source == 'stamped' and query_obj.key:
                 mongo_query['_id'] = {'$lt' : ObjectId(query_obj.key)}
                 nemesis_ids = query_obj.entity.sources.nemesis_ids
             matches = self.__id_query(mongo_query)
             for match in matches:
                 match_id = str(match['_id'])
                 if nemesis_ids and match_id in nemesis_ids:
                     continue
                 if match_id not in id_set:
                     id_set.add(match_id)
                     yield match_id
     except GeneratorExit:
         pass
     logs.debug('Consumed %d results from query: %s' % (len(id_set), id_set))
Exemple #2
0
    def searchLite(self, queryCategory, queryText, timeout=None, coords=None, logRawResults=False):
        tokenQueries = formatSearchQuery(queryText)
        if queryCategory == 'film':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                        { 'types' : { '$in' : [ 'tv', 'movie' ] } },
                        { 'subcategory' : { '$in' : [ 'tv', 'movie' ] } },
                    ]
                } ],
            }
        elif queryCategory == 'music':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'types' : { '$in' : [ 'artist', 'album', 'track' ] } },
                            { 'subcategory' : { '$in' : [ 'artist', 'album', 'song' ] } },
                    ]
                } ],
            }
        elif queryCategory == 'place':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'kind' : 'place' },
                            { 'subcategory' : { '$in' : [ 'bar', 'restaurant' ] } },
                    ]
                } ],
            }
        elif queryCategory == 'app':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'types' : 'app' },
                            { 'subcategory' : 'app' },
                    ]
                } ],
            }
        elif queryCategory == 'book':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'types' : 'book' },
                            { 'subcategory' : 'book' },
                    ]
                } ],
            }
        else:
            raise NotImplementedError()
        # Exclude tombstoned listings.
        and_list = query.setdefault('$and',[])
        and_list.append({'sources.tombstone_id' : { '$exists':False }})
        and_list.append({'sources.user_generated_id' : { '$exists':False }})
        entityIds = [match['_id'] for match in self.__id_query(query)]
        # TODO: Should just retrieve all of this from the initial query!
        entityProxies = [ self.entityProxyFromKey(entityId) for entityId in entityIds ]
        if logRawResults:
            logComponents = ['\n\n\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\n\n\n']
            logComponents.extend(['\n\n%s\n\n' % str(proxy.entity) for proxy in entityProxies])
            logComponents.append('\n\n\nEND STAMPED RAW RESULTS\n\n\n')
            logs.debug(''.join(logComponents))
        entityStats = MongoEntityStatsCollection().getStatsForEntities(entityIds)
        statsByEntityId = dict([(stats.entity_id, stats) for stats in entityStats])
        results = []
        for entityProxy in entityProxies:
            stats = statsByEntityId.get(entityProxy.key, None)
            # Use fairly conservative scoring now for StampedSource on the assumption that it will probably cluster
            # with other stuff.
            num_stamps = 0 if stats is None else stats.num_stamps
            result = SearchResult(entityProxy)
            result.relevance = 0.3 + 0.2 * (num_stamps ** 0.5)
            result.addRelevanceComponentDebugInfo('Initial score based on Entity with %d stamps' % num_stamps,
                                                  result.relevance)

            if isTrack(result.resolverObject):
                applyTrackTitleDataQualityTests(result, queryText)
                adjustTrackRelevanceByQueryMatch(result, queryText)
                augmentTrackDataQualityOnBasicAttributePresence(result)
            elif isAlbum(result.resolverObject):
                applyAlbumTitleDataQualityTests(result, queryText)
                adjustAlbumRelevanceByQueryMatch(result, queryText)
                augmentAlbumDataQualityOnBasicAttributePresence(result)
            elif isArtist(result.resolverObject):
                applyArtistTitleDataQualityTests(result, queryText)
                adjustArtistRelevanceByQueryMatch(result, queryText)
                augmentArtistDataQualityOnBasicAttributePresence(result)
            elif isTvShow(result.resolverObject):
                applyTvTitleDataQualityTests(result, queryText)
                adjustTvRelevanceByQueryMatch(result, queryText)
                augmentTvDataQualityOnBasicAttributePresence(result)
            elif isMovie(result.resolverObject):
                applyMovieTitleDataQualityTests(result, queryText)
                adjustMovieRelevanceByQueryMatch(result, queryText)
                augmentMovieDataQualityOnBasicAttributePresence(result)
            elif isBook(result.resolverObject):
                applyBookDataQualityTests(result, queryText)
                adjustBookRelevanceByQueryMatch(result, queryText)
                augmentBookDataQualityOnBasicAttributePresence(result)
            elif isPlace(result.resolverObject):
                applyPlaceTitleDataQualityTests(result, queryText)
                # augmentPlaceRelevanceScoresForTitleMatchAndProximity(result, queryText, coords)
                augmentPlaceDataQualityOnBasicAttributePresence(result)
            elif isApp(result.resolverObject):
                applyAppTitleDataQualityTests(result, queryText)
                augmentAppDataQualityOnBasicAttributePresence(result)


            results.append(result)
        sortByRelevance(results)
        return results