Exemple #1
0
def scoreResultsWithBasicDropoffScoring(resolverObjectList, sourceScore=1.0, dropoffFactor=0.7):
    """
    Takes a list of resolver objects and does the initial pass of relevance scoring, just accounting for base source
    credibility and position in the list.
    """
    currScore = sourceScore
    searchResults = []
    for rank, resolverObject in enumerate(resolverObjectList):
        result = SearchResult(resolverObject)
        result.relevance = currScore
        result.addRelevanceComponentDebugInfo('Initial score for ranking at %d' % (rank + 1), currScore)
        searchResults.append(result)
        currScore *= dropoffFactor
    return searchResults
Exemple #2
0
    def searchLite(self, queryCategory, queryText, timeout=None, coords=None, logRawResults=False):
        tokenQueries = formatSearchQuery(queryText)
        if queryCategory == 'film':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                        { 'types' : { '$in' : [ 'tv', 'movie' ] } },
                        { 'subcategory' : { '$in' : [ 'tv', 'movie' ] } },
                    ]
                } ],
            }
        elif queryCategory == 'music':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'types' : { '$in' : [ 'artist', 'album', 'track' ] } },
                            { 'subcategory' : { '$in' : [ 'artist', 'album', 'song' ] } },
                    ]
                } ],
            }
        elif queryCategory == 'place':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'kind' : 'place' },
                            { 'subcategory' : { '$in' : [ 'bar', 'restaurant' ] } },
                    ]
                } ],
            }
        elif queryCategory == 'app':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'types' : 'app' },
                            { 'subcategory' : 'app' },
                    ]
                } ],
            }
        elif queryCategory == 'book':
            query = {
                '$and' : tokenQueries + [ {
                    '$or' : [
                            { 'types' : 'book' },
                            { 'subcategory' : 'book' },
                    ]
                } ],
            }
        else:
            raise NotImplementedError()
        # Exclude tombstoned listings.
        and_list = query.setdefault('$and',[])
        and_list.append({'sources.tombstone_id' : { '$exists':False }})
        and_list.append({'sources.user_generated_id' : { '$exists':False }})
        entityIds = [match['_id'] for match in self.__id_query(query)]
        # TODO: Should just retrieve all of this from the initial query!
        entityProxies = [ self.entityProxyFromKey(entityId) for entityId in entityIds ]
        if logRawResults:
            logComponents = ['\n\n\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\nSTAMPED RAW RESULTS\n\n\n']
            logComponents.extend(['\n\n%s\n\n' % str(proxy.entity) for proxy in entityProxies])
            logComponents.append('\n\n\nEND STAMPED RAW RESULTS\n\n\n')
            logs.debug(''.join(logComponents))
        entityStats = MongoEntityStatsCollection().getStatsForEntities(entityIds)
        statsByEntityId = dict([(stats.entity_id, stats) for stats in entityStats])
        results = []
        for entityProxy in entityProxies:
            stats = statsByEntityId.get(entityProxy.key, None)
            # Use fairly conservative scoring now for StampedSource on the assumption that it will probably cluster
            # with other stuff.
            num_stamps = 0 if stats is None else stats.num_stamps
            result = SearchResult(entityProxy)
            result.relevance = 0.3 + 0.2 * (num_stamps ** 0.5)
            result.addRelevanceComponentDebugInfo('Initial score based on Entity with %d stamps' % num_stamps,
                                                  result.relevance)

            if isTrack(result.resolverObject):
                applyTrackTitleDataQualityTests(result, queryText)
                adjustTrackRelevanceByQueryMatch(result, queryText)
                augmentTrackDataQualityOnBasicAttributePresence(result)
            elif isAlbum(result.resolverObject):
                applyAlbumTitleDataQualityTests(result, queryText)
                adjustAlbumRelevanceByQueryMatch(result, queryText)
                augmentAlbumDataQualityOnBasicAttributePresence(result)
            elif isArtist(result.resolverObject):
                applyArtistTitleDataQualityTests(result, queryText)
                adjustArtistRelevanceByQueryMatch(result, queryText)
                augmentArtistDataQualityOnBasicAttributePresence(result)
            elif isTvShow(result.resolverObject):
                applyTvTitleDataQualityTests(result, queryText)
                adjustTvRelevanceByQueryMatch(result, queryText)
                augmentTvDataQualityOnBasicAttributePresence(result)
            elif isMovie(result.resolverObject):
                applyMovieTitleDataQualityTests(result, queryText)
                adjustMovieRelevanceByQueryMatch(result, queryText)
                augmentMovieDataQualityOnBasicAttributePresence(result)
            elif isBook(result.resolverObject):
                applyBookDataQualityTests(result, queryText)
                adjustBookRelevanceByQueryMatch(result, queryText)
                augmentBookDataQualityOnBasicAttributePresence(result)
            elif isPlace(result.resolverObject):
                applyPlaceTitleDataQualityTests(result, queryText)
                # augmentPlaceRelevanceScoresForTitleMatchAndProximity(result, queryText, coords)
                augmentPlaceDataQualityOnBasicAttributePresence(result)
            elif isApp(result.resolverObject):
                applyAppTitleDataQualityTests(result, queryText)
                augmentAppDataQualityOnBasicAttributePresence(result)


            results.append(result)
        sortByRelevance(results)
        return results