def entityGenerator(): id_set = set() try: for query in token_queries: tokenSearchQuery = formatSearchQuery(query) if not tokenSearchQuery: continue mongo_query = { 'sources.tombstone_id' : {'$exists' : False}, 'sources.user_generated_id' : {'$exists' : False}, '$and' : tokenSearchQuery, } mongo_query.update(kwargs) nemesis_ids = None if query_obj.source == 'stamped' and query_obj.key: mongo_query['_id'] = {'$lt' : ObjectId(query_obj.key)} nemesis_ids = query_obj.entity.sources.nemesis_ids matches = self.__id_query(mongo_query) for match in matches: match_id = str(match['_id']) if nemesis_ids and match_id in nemesis_ids: continue if match_id not in id_set: id_set.add(match_id) yield match_id except GeneratorExit: pass logs.debug('Consumed %d results from query: %s' % (len(id_set), id_set))
def searchLite(self, queryCategory, queryText, timeout=None, coords=None, logRawResults=False): tokenQueries = formatSearchQuery(queryText) if queryCategory == 'film': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : { '$in' : [ 'tv', 'movie' ] } }, { 'subcategory' : { '$in' : [ 'tv', 'movie' ] } }, ] } ], } elif queryCategory == 'music': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : { '$in' : [ 'artist', 'album', 'track' ] } }, { 'subcategory' : { '$in' : [ 'artist', 'album', 'song' ] } }, ] } ], } elif queryCategory == 'place': query = { '$and' : tokenQueries + [ { '$or' : [ { 'kind' : 'place' }, { 'subcategory' : { '$in' : [ 'bar', 'restaurant' ] } }, ] } ], } elif queryCategory == 'app': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : 'app' }, { 'subcategory' : 'app' }, ] } ], } elif queryCategory == 'book': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : 'book' }, { 'subcategory' : 'book' }, ] } ], } else: raise NotImplementedError() # Exclude tombstoned listings. and_list = query.setdefault('$and',[]) and_list.append({'sources.tombstone_id' : { '$exists':False }}) and_list.append({'sources.user_generated_id' : { '$exists':False }}) entityIds = [match['_id'] for match in self.__id_query(query)] # TODO: Should just retrieve all of this from the initial query! entityProxies = [ self.entityProxyFromKey(entityId) for entityId in entityIds ] if logRawResults: logComponents = ['\n\n\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\n\n\n'] logComponents.extend(['\n\n%s\n\n' % str(proxy.entity) for proxy in entityProxies]) logComponents.append('\n\n\nEND STAMPED RAW RESULTS\n\n\n') logs.debug(''.join(logComponents)) entityStats = MongoEntityStatsCollection().getStatsForEntities(entityIds) statsByEntityId = dict([(stats.entity_id, stats) for stats in entityStats]) results = [] for entityProxy in entityProxies: stats = statsByEntityId.get(entityProxy.key, None) # Use fairly conservative scoring now for StampedSource on the assumption that it will probably cluster # with other stuff. num_stamps = 0 if stats is None else stats.num_stamps result = SearchResult(entityProxy) result.relevance = 0.3 + 0.2 * (num_stamps ** 0.5) result.addRelevanceComponentDebugInfo('Initial score based on Entity with %d stamps' % num_stamps, result.relevance) if isTrack(result.resolverObject): applyTrackTitleDataQualityTests(result, queryText) adjustTrackRelevanceByQueryMatch(result, queryText) augmentTrackDataQualityOnBasicAttributePresence(result) elif isAlbum(result.resolverObject): applyAlbumTitleDataQualityTests(result, queryText) adjustAlbumRelevanceByQueryMatch(result, queryText) augmentAlbumDataQualityOnBasicAttributePresence(result) elif isArtist(result.resolverObject): applyArtistTitleDataQualityTests(result, queryText) adjustArtistRelevanceByQueryMatch(result, queryText) augmentArtistDataQualityOnBasicAttributePresence(result) elif isTvShow(result.resolverObject): applyTvTitleDataQualityTests(result, queryText) adjustTvRelevanceByQueryMatch(result, queryText) augmentTvDataQualityOnBasicAttributePresence(result) elif isMovie(result.resolverObject): applyMovieTitleDataQualityTests(result, queryText) adjustMovieRelevanceByQueryMatch(result, queryText) augmentMovieDataQualityOnBasicAttributePresence(result) elif isBook(result.resolverObject): applyBookDataQualityTests(result, queryText) adjustBookRelevanceByQueryMatch(result, queryText) augmentBookDataQualityOnBasicAttributePresence(result) elif isPlace(result.resolverObject): applyPlaceTitleDataQualityTests(result, queryText) # augmentPlaceRelevanceScoresForTitleMatchAndProximity(result, queryText, coords) augmentPlaceDataQualityOnBasicAttributePresence(result) elif isApp(result.resolverObject): applyAppTitleDataQualityTests(result, queryText) augmentAppDataQualityOnBasicAttributePresence(result) results.append(result) sortByRelevance(results) return results