def scoreResultsWithBasicDropoffScoring(resolverObjectList, sourceScore=1.0, dropoffFactor=0.7): """ Takes a list of resolver objects and does the initial pass of relevance scoring, just accounting for base source credibility and position in the list. """ currScore = sourceScore searchResults = [] for rank, resolverObject in enumerate(resolverObjectList): result = SearchResult(resolverObject) result.relevance = currScore result.addRelevanceComponentDebugInfo('Initial score for ranking at %d' % (rank + 1), currScore) searchResults.append(result) currScore *= dropoffFactor return searchResults
def searchLite(self, queryCategory, queryText, timeout=None, coords=None, logRawResults=False): tokenQueries = formatSearchQuery(queryText) if queryCategory == 'film': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : { '$in' : [ 'tv', 'movie' ] } }, { 'subcategory' : { '$in' : [ 'tv', 'movie' ] } }, ] } ], } elif queryCategory == 'music': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : { '$in' : [ 'artist', 'album', 'track' ] } }, { 'subcategory' : { '$in' : [ 'artist', 'album', 'song' ] } }, ] } ], } elif queryCategory == 'place': query = { '$and' : tokenQueries + [ { '$or' : [ { 'kind' : 'place' }, { 'subcategory' : { '$in' : [ 'bar', 'restaurant' ] } }, ] } ], } elif queryCategory == 'app': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : 'app' }, { 'subcategory' : 'app' }, ] } ], } elif queryCategory == 'book': query = { '$and' : tokenQueries + [ { '$or' : [ { 'types' : 'book' }, { 'subcategory' : 'book' }, ] } ], } else: raise NotImplementedError() # Exclude tombstoned listings. and_list = query.setdefault('$and',[]) and_list.append({'sources.tombstone_id' : { '$exists':False }}) and_list.append({'sources.user_generated_id' : { '$exists':False }}) entityIds = [match['_id'] for match in self.__id_query(query)] # TODO: Should just retrieve all of this from the initial query! entityProxies = [ self.entityProxyFromKey(entityId) for entityId in entityIds ] if logRawResults: logComponents = ['\n\n\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\n\n\n'] logComponents.extend(['\n\n%s\n\n' % str(proxy.entity) for proxy in entityProxies]) logComponents.append('\n\n\nEND STAMPED RAW RESULTS\n\n\n') logs.debug(''.join(logComponents)) entityStats = MongoEntityStatsCollection().getStatsForEntities(entityIds) statsByEntityId = dict([(stats.entity_id, stats) for stats in entityStats]) results = [] for entityProxy in entityProxies: stats = statsByEntityId.get(entityProxy.key, None) # Use fairly conservative scoring now for StampedSource on the assumption that it will probably cluster # with other stuff. num_stamps = 0 if stats is None else stats.num_stamps result = SearchResult(entityProxy) result.relevance = 0.3 + 0.2 * (num_stamps ** 0.5) result.addRelevanceComponentDebugInfo('Initial score based on Entity with %d stamps' % num_stamps, result.relevance) if isTrack(result.resolverObject): applyTrackTitleDataQualityTests(result, queryText) adjustTrackRelevanceByQueryMatch(result, queryText) augmentTrackDataQualityOnBasicAttributePresence(result) elif isAlbum(result.resolverObject): applyAlbumTitleDataQualityTests(result, queryText) adjustAlbumRelevanceByQueryMatch(result, queryText) augmentAlbumDataQualityOnBasicAttributePresence(result) elif isArtist(result.resolverObject): applyArtistTitleDataQualityTests(result, queryText) adjustArtistRelevanceByQueryMatch(result, queryText) augmentArtistDataQualityOnBasicAttributePresence(result) elif isTvShow(result.resolverObject): applyTvTitleDataQualityTests(result, queryText) adjustTvRelevanceByQueryMatch(result, queryText) augmentTvDataQualityOnBasicAttributePresence(result) elif isMovie(result.resolverObject): applyMovieTitleDataQualityTests(result, queryText) adjustMovieRelevanceByQueryMatch(result, queryText) augmentMovieDataQualityOnBasicAttributePresence(result) elif isBook(result.resolverObject): applyBookDataQualityTests(result, queryText) adjustBookRelevanceByQueryMatch(result, queryText) augmentBookDataQualityOnBasicAttributePresence(result) elif isPlace(result.resolverObject): applyPlaceTitleDataQualityTests(result, queryText) # augmentPlaceRelevanceScoresForTitleMatchAndProximity(result, queryText, coords) augmentPlaceDataQualityOnBasicAttributePresence(result) elif isApp(result.resolverObject): applyAppTitleDataQualityTests(result, queryText) augmentAppDataQualityOnBasicAttributePresence(result) results.append(result) sortByRelevance(results) return results