Beispiel #1
0
 def __init__(self,guiUtility):
     if TorrentManager.__single:
         raise RuntimeError, "TorrentSearchGridManager is singleton"
     TorrentManager.__single = self
     self.guiUtility = guiUtility
     
     # Contains all matches for keywords in DB, not filtered by category
     self.hits = []
     
     # Remote results for current keywords
     self.remoteHits = {}
     
     # For asking for a refresh when remote results came in
     self.gridmgr = None
     self.guiserver = GUITaskQueue.getInstance()
     
     self.searchkeywords = []
     self.rerankingStrategy = DefaultTorrentReranker()
     self.oldsearchkeywords = []
     
     self.filteredResults = 0
     
     self.bundler = Bundler()
     self.bundle_mode = None
     self.category = Category.getInstance()
Beispiel #2
0
class TorrentManager:
    # Code to make this a singleton
    __single = None
   
    def __init__(self,guiUtility):
        if TorrentManager.__single:
            raise RuntimeError, "TorrentSearchGridManager is singleton"
        TorrentManager.__single = self
        self.guiUtility = guiUtility
        
        # Contains all matches for keywords in DB, not filtered by category
        self.hits = []
        
        # Remote results for current keywords
        self.remoteHits = {}
        
        # For asking for a refresh when remote results came in
        self.gridmgr = None
        self.guiserver = GUITaskQueue.getInstance()
        
        self.searchkeywords = []
        self.rerankingStrategy = DefaultTorrentReranker()
        self.oldsearchkeywords = []
        
        self.filteredResults = 0
        
        self.bundler = Bundler()
        self.bundle_mode = None
        self.category = Category.getInstance()

    def getInstance(*args, **kw):
        if TorrentManager.__single is None:
            TorrentManager(*args, **kw)       
        return TorrentManager.__single
    getInstance = staticmethod(getInstance)
    
    def getCollectedFilename(self, torrent):
        torrent_dir = self.guiUtility.utility.session.get_torrent_collecting_dir()
        
        if 'torrent_file_name' not in torrent or not torrent['torrent_file_name']:
            torrent['torrent_file_name'] = get_collected_torrent_filename(torrent['infohash'])
        torrent_filename = os.path.join(torrent_dir, torrent['torrent_file_name'])
        
        #.torrent found, return complete filename
        if os.path.isfile(torrent_filename):
            return torrent_filename
        
        #.torrent not found, possibly a new torrent_collecting_dir
        torrent['torrent_file_name'] = get_collected_torrent_filename(torrent['infohash'])
        torrent_filename = os.path.join(torrent_dir, torrent['torrent_file_name'])
        if os.path.isfile(torrent_filename):
            return torrent_filename
        
    def getTorrent(self, torrent, callback):
        """
        TORRENT is a dictionary containing torrent information used to
        display the entry on the UI. it is NOT the torrent file!
        
        CALLBACK is called when the torrent is downloaded. When no
        torrent can be downloaded the callback is ignored
        
        Returns a filename, if filename is known or a boolean + request_type
        describing if the torrent is requested
        """
        torrent_filename = self.getCollectedFilename(torrent)
        if torrent_filename:
            return torrent_filename
        
        #.torrent not found, try to download from peers
        if self.downloadTorrentfileFromPeers(torrent, callback):
            return (True, "from peers")
        
        return (False, "could not get torrent")
             
    def downloadTorrentfileFromPeers(self, torrent, callback, duplicate=True, prio = 0):
        """
        TORRENT is a dictionary containing torrent information used to
        display the entry on the UI. it is NOT the torrent file!

        CALLBACK is called when the torrent is downloaded. When no
        torrent can be downloaded the callback is ignored

        DUPLICATE can be True: the file will be downloaded from peers
        regardless of a previous/current download attempt (returns
        True). Or DUPLICATE can be False: the file will only be
        downloaded when it was not yet attempted to download (when
        False is returned no callback will be made)
        
        PRIO is the priority, default is 0 which means we need this torrent now.
        If PRIO != 0, then a rate limiter could be used by the remotetorrentrequester

        Returns True or False
        """

        # return False when duplicate
        if not duplicate and torrent.get('query_torrent_was_requested', False):
            return False
        
        torrent['query_torrent_was_requested'] = True
        if not 'query_permids' in torrent or len(torrent['query_permids']) == 0:
            self.guiUtility.utility.session.download_torrentfile(torrent['infohash'], callback, prio)
            
        else:
            for permid in torrent['query_permids']:
                self.guiUtility.utility.session.download_torrentfile_from_peer(permid, torrent['infohash'], callback, prio)
        
        return True
    
    def downloadTorrent(self, torrent, dest = None, secret = False, vodmode = False, selectedFiles = None):
        callback = lambda infohash, metadata, filename: self.downloadTorrent(torrent, dest, secret, vodmode, selectedFiles)
        callback.__name__ = "downloadTorrent_callback"
        torrent_filename = self.getTorrent(torrent, callback)
        
        if isinstance(torrent_filename, basestring):
            #got actual filename
            
            if torrent.get('name'):
                name = torrent['name']
            else:
                name = torrent['infohash']
            
            clicklog={'keywords': self.searchkeywords,
                      'reranking_strategy': self.rerankingStrategy.getID()}
            
            if "click_position" in torrent:
                clicklog["click_position"] = torrent["click_position"]
            
            # Api download
            d = self.guiUtility.frame.startDownload(torrent_filename,destdir=dest,clicklog=clicklog,name=name,vodmode=vodmode, selectedFiles = selectedFiles) ## remove name=name
            if d:
                if secret:
                    self.torrent_db.setSecret(torrent['infohash'], secret)

                if DEBUG:
                    print >>sys.stderr,'standardDetails: download: download started'
               
                torrent['myDownloadHistory'] = True
                
        elif torrent_filename[0]:
            #torrent is being requested from peers, using callback this function will be called again
            return torrent_filename[1]
        else:
            #torrent not found
            def showdialog():
                str = self.guiUtility.utility.lang.get('delete_torrent') % torrent['name']
                dlg = wx.MessageDialog(self.guiUtility.frame, str, self.guiUtility.utility.lang.get('delete_dead_torrent'), 
                                    wx.YES_NO|wx.NO_DEFAULT|wx.ICON_QUESTION)
                result = dlg.ShowModal()
                dlg.Destroy()
            
                if result == wx.ID_YES:
                    infohash = torrent['infohash']
                    self.torrent_db.deleteTorrent(infohash, delete_file=True, commit = True)
            wx.CallAfter(showdialog)
    
    def isTorrentPlayable(self, torrent, default=(False, [], []), callback=None):
        """
        TORRENT is a dictionary containing torrent information used to
        display the entry on the UI. it is NOT the torrent file!

        DEFAULT indicates the default value when we don't know if the
        torrent is playable. 

        CALLBACK can be given to result the actual 'playable' value
        for the torrent after some downloading/processing. The DEFAULT
        value is returned in this case. Will only be called if
        self.item == torrent

        The return value is a tuple consisting of a boolean indicating if the torrent is playable and a list.
        If the torrent is not playable or if the default value is returned the boolean is False and the list is empty.
        If it is playable the boolean is true and the list returned consists of the playable files within the actual torrent. 
        """
        torrent_callback = lambda infohash, metadata, filename: self.isTorrentPlayable(torrent, default, callback)
        torrent_callback.__name__ = "isTorrentPlayable_callback"
        torrent_filename = self.getTorrent(torrent, torrent_callback)
        
        if isinstance(torrent_filename, basestring):
            #got actual filename
            tdef = TorrentDef.load(torrent_filename)
            
            files = tdef.get_files_as_unicode(exts=videoextdefaults)
            allfiles = tdef.get_files_as_unicode_with_length()
            playable = len(files) > 0
            
            torrent['comment'] = tdef.get_comment_as_unicode()
            if tdef.get_tracker_hierarchy():
                torrent['trackers'] = tdef.get_tracker_hierarchy()
            else:
                torrent['trackers'] = [[tdef.get_tracker()]]
            
            if not callback is None:
                callback(torrent, (playable, files, allfiles))
            else:
                return torrent, (playable, files, allfiles)
            
        elif not torrent_filename[0]:
            if DEBUG:
                print >>sys.stderr, "standardDetails:torrent_is_playable returning default", default
            callback(torrent, default)
        else:
            return torrent_filename[1]
    
    def getSwarmInfo(self, infohash):
        return self.torrent_db.getSwarmInfoByInfohash(infohash)
    
    def set_gridmgr(self,gridmgr):
        self.gridmgr = gridmgr
    
    def connect(self):
        session = self.guiUtility.utility.session
        self.torrent_db = session.open_dbhandler(NTFY_TORRENTS)
        self.pref_db = session.open_dbhandler(NTFY_PREFERENCES)
        self.mypref_db = session.open_dbhandler(NTFY_MYPREFERENCES)
        self.search_db = session.open_dbhandler(NTFY_SEARCH)
        self.votecastdb = session.open_dbhandler(NTFY_VOTECAST)
        self.searchmgr = SearchManager(self.torrent_db)
        self.library_manager = self.guiUtility.library_manager
    
    def getHitsInCategory(self, categorykey = 'all', sort = 'rameezmetric'):
        if DEBUG: begintime = time()
        # categorykey can be 'all', 'Video', 'Document', ...
        bundle_mode = self.bundle_mode
        
        if DEBUG:
            print >>sys.stderr,"TorrentSearchManager: getHitsInCategory:",categorykey
        
        categorykey = categorykey.lower()
        enabledcattuples = self.category.getCategoryNames()
        enabledcatslow = ["other"]
        for catname,_ in enabledcattuples:
            enabledcatslow.append(catname.lower())
        
        # TODO: do all filtering in DB query
        def torrentFilter(torrent):
            #show dead torrents in library
            okCategory = False
            if not okCategory:
                categories = torrent.get("category", [])
                if not categories:
                    categories = ["other"]
                if categorykey == 'all':
                    for torcat in categories:
                        if torcat.lower() in enabledcatslow:
                            okCategory = True
                            break
                elif categorykey in [cat.lower() for cat in categories]:
                    okCategory = True
            
            if not okCategory:
                self.filteredResults += 1
            
            okGood = torrent['status'] != 'dead'
                        
            #print >>sys.stderr,"FILTER: lib",okLibrary,"cat",okCategory,"good",okGood
            return okCategory and okGood
        
        # 1. Local search puts hits in self.hits
        if DEBUG:
            beginlocalsearch = time()
        new_local_hits = self.searchLocalDatabase()
        
        if DEBUG:
            print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: search found: %d items took %s' % (len(self.hits), time() - beginlocalsearch)

        # 2. Filter self.hits on category and status
        if DEBUG:
            beginfilterhits = time()
            
        if new_local_hits:
            self.hits = filter(torrentFilter, self.hits)

        if DEBUG:
            print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: torrentFilter after filter found: %d items took %s' % (len(self.hits), time() - beginfilterhits)
        
        # 3. Add remote hits that may apply. TODO: double filtering, could
        # add remote hits to self.hits before filter(torrentFilter,...)
        self.addStoredRemoteResults()

        if DEBUG:
            print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: found after remote search: %d items' % len(self.hits)

        if DEBUG:
            beginsort = time()
        
        if sort == 'rameezmetric':
            self.sort()

        # Nic: Ok this is somewhat diagonal to the previous sorting algorithms
        # eventually, these should probably be combined
        # since for now, however, my reranking is very tame (exchanging first and second place under certain circumstances)
        # this should be fine...
        self.hits = self.rerankingStrategy.rerank(self.hits, self.searchkeywords, self.torrent_db, 
                                                        self.pref_db, self.mypref_db, self.search_db)
        
        # boudewijn: now that we have sorted the search results we
        # want to prefetch the top N torrents.
        self.guiserver.add_task(self.prefetch_hits, t = 1, id = "PREFETCH_RESULTS")
        self.hits = self.library_manager.addDownloadStates(self.hits)
        
        if DEBUG:
            beginbundle = time()
            
        # vliegendhart: do grouping here
        # Niels: important, we should not change self.hits otherwise prefetching will not work 
        returned_hits, selected_bundle_mode = self.bundler.bundle(self.hits, bundle_mode, self.searchkeywords)
        
        if DEBUG:
            print >> sys.stderr, 'getHitsInCat took: %s of which sort took %s, bundle took %s' % (time() - begintime, beginbundle - beginsort, time() - beginbundle)

        #return [len(self.hits), self.filteredResults , self.hits]
        return [len(returned_hits), self.filteredResults , selected_bundle_mode, returned_hits]

    def prefetch_hits(self):
        """
        Prefetching attempts to reduce the time required to get the
        user the data it wants.

        We assume the torrent at the beginning of self.hits are more
        likely to be selected by the user than the ones at the
        end. This allows us to perform prefetching operations on a
        subselection of these items.

        The prefetch_hits function can be called multiple times. It
        will only attempt to prefetch every PREFETCH_DELAY
        seconds. This gives search results from multiple sources the
        chance to be received and sorted before prefetching a subset.
        """
        if DEBUG: begin_time = time()
        torrent_dir = Session.get_instance().get_torrent_collecting_dir()
        hit_counter = 0
        prefetch_counter = 0

        # prefetch .torrent files if they are from buddycast sources
        for hit in self.hits:
            def sesscb_prefetch_done(infohash, metadata, filename):
                if DEBUG:
                    # find the origional hit
                    for hit in self.hits:
                        if hit["infohash"] == infohash:
                            print >> sys.stderr, "Prefetch: in", "%.1fs" % (time() - begin_time), `hit["name"]`
                            return
                    print >> sys.stderr, "Prefetch BUG. We got a hit from something we didn't ask for"
            
            
            torrent_filename = self.getCollectedFilename(hit)
            if not torrent_filename:
                if self.downloadTorrentfileFromPeers(hit, sesscb_prefetch_done, duplicate=False, prio = 1):
                    if DEBUG: print >> sys.stderr, "Prefetch: attempting to download", `hit["name"]`
                    prefetch_counter += 1

            hit_counter += 1
            if prefetch_counter >= 10 or hit_counter >= 25:
                # (1) prefetch a maximum of N hits
                # (2) prefetch only from the first M hits
                # (.) wichever is lowest or (1) or (2)
                break
    
    def getSearchKeywords(self ):
        return self.searchkeywords, len(self.hits), self.filteredResults
    
    def setSearchKeywords(self, wantkeywords):
        if wantkeywords != self.searchkeywords:
            self.bundle_mode = None
        
        self.searchkeywords = wantkeywords
        if DEBUG:
            print >> sys.stderr, "TorrentSearchGridManager: keywords:", self.searchkeywords,";time:%", time()
            
        self.filteredResults = 0
        self.remoteHits = {}
        self.oldsearchkeywords = ''
            
    def setBundleMode(self, bundle_mode):
        if bundle_mode != self.bundle_mode:
            self.bundle_mode = bundle_mode
            self.refreshGrid()

    def searchLocalDatabase(self):
        """ Called by GetHitsInCategory() to search local DB. Caches previous query result. """
        if self.searchkeywords == self.oldsearchkeywords and len(self.hits) > 0:
            if DEBUG:
                print >>sys.stderr,"TorrentSearchGridManager: searchLocalDB: returning old hit list",len(self.hits)
            return False

        self.oldsearchkeywords = self.searchkeywords
        if DEBUG:
            print >>sys.stderr,"TorrentSearchGridManager: searchLocalDB: Want",self.searchkeywords
                    
        if len(self.searchkeywords) == 0 or len(self.searchkeywords) == 1 and self.searchkeywords[0] == '':
            return False
        
        self.hits = self.searchmgr.search(self.searchkeywords)
        return True

    def addStoredRemoteResults(self):
        """ Called by GetHitsInCategory() to add remote results to self.hits """
        if len(self.remoteHits) > 0:
            numResults = 0
            def catFilter(item):
                icat = item.get('category')
                if type(icat) == list:
                    icat = icat[0].lower()
                elif type(icat) == str:
                    icat = icat.lower()
                else:
                    return False
            
            #catResults = filter(catFilter, self.remoteHits.values())
            catResults = self.remoteHits.values()
            if DEBUG:
                print >> sys.stderr,"TorrentSearchGridManager: remote: Adding %d remote results (%d in category)" % (len(self.remoteHits), len(catResults))
            
            for remoteItem in catResults:
                known = False
                for item in self.hits:
                    #print >> sys.stderr,"TorrentSearchGridManager: remote: Should we add",`remoteItem['name']`
                    if item['infohash'] == remoteItem['infohash']:
                        known = True
                        # if a hit belongs to a more popular channel, then replace the previous
                        """
                        if remoteItem['channel_permid'] !="" and remoteItem['channel_name'] != "" and remoteItem['subscriptions']-remoteItem['neg_votes'] > item['subscriptions']-item['neg_votes']:
                            item['subscriptions'] = remoteItem['subscriptions']
                            item['neg_votes'] = remoteItem['neg_votes']
                            item['channel_permid'] = remoteItem['channel_permid']
                            item['channel_name'] = remoteItem['channel_name']
                        """
                        break
                if not known:
                    #print >> sys.stderr,"TorrentSearchGridManager: remote: Adding",`remoteItem['name']`
                    self.hits.append(remoteItem)
                    numResults+=1
        
    def gotRemoteHits(self, permid, kws, answers):
        """
        Called by GUIUtil when hits come in.

        29/06/11 boudewijn: from now on called on the GUITaskQueue instead on the wx MainThread to
        avoid blocking the GUI because of the database queries.
        """
        self.guiserver.add_task(lambda: self._gotRemoteHits(permid, kws, answers))
        
    def _gotRemoteHits(self, permid, kws, answers):
        try:
            if DEBUG:
                print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: got",len(answers),"unfiltered results for",kws, bin2str(permid), time()
            
            # Always store the results, only display when in filesMode
            # We got some replies. First check if they are for the current query
            if self.searchkeywords == kws:
                numResults = 0
                catobj = Category.getInstance()
                for key,value in answers.iteritems():
                    
                    if self.torrent_db.hasTorrent(key):
                        if DEBUG:
                            print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: Ignoring hit for",`value['content_name']`,"already got it"
                        continue # do not show results we have ourselves
                    
                    # First, check if it matches the word boundaries, that belongs to previous version
                    
                    # Convert answer fields as per 
                    # Session.query_connected_peers() spec. to NEWDB format
                    newval = {}
                    newval['name'] = value['content_name']                    
                    newval['infohash'] = key
                    newval['torrent_file_name'] = ''
                    newval['length'] = value['length']
                    newval['creation_date'] = time()  # None  gives '?' in GUI
                    newval['relevance'] = 0
                    newval['source'] = 'RQ'
                    newval['category'] = value['category'][0] 
                    # We trust the peer
                    newval['status'] = 'good'
                    newval['num_seeders'] = value['seeder'] or 0
                    newval['num_leechers'] = value['leecher'] or 0

                    # OLPROTO_VER_NINETH includes a torrent_size. Set to
                    # -1 when not available.
                    if 'torrent_size' in value:
                        newval['torrent_size'] = value['torrent_size']
                    else:
                        newval['torrent_size'] = -1
                        
                    # OLPROTO_VER_ELEVENTH includes channel_permid, channel_name fields.
                    if 'channel_permid' not in value:
                        # just to check if it is not OLPROTO_VER_ELEVENTH version
                        # if so, check word boundaries in the swarm name
                        ls = split_into_keywords(value['content_name'])

                        if DEBUG:
                            print >>sys.stderr,"TorrentSearchGridManager: ls is",`ls`
                            print >>sys.stderr,"TorrentSearchGridManager: kws is",`kws`
                        
                        flag = False
                        for kw in kws:
                            if kw not in ls:
                                flag=True
                                break
                        if flag:
                            continue
                        
                    if 'channel_permid' in value:
                        newval['channel_permid']=value['channel_permid']
                    else:
                        newval['channel_permid']=""
                        
                    if 'channel_name' in value:
                        newval['channel_name'] = value['channel_name']
                    else:
                        newval['channel_name']=""
                        
                    if 'channel_permid' in value:
                        newval['neg_votes'] = self.votecastdb.getNegVotes(value['channel_permid'])
                        newval['subscriptions'] = self.votecastdb.getNumSubscriptions(value['channel_permid'])
                        if newval['subscriptions']-newval['neg_votes']<VOTE_LIMIT:
                            # now, this is SPAM
                            continue
                    else:
                        newval['subscriptions']=0
                        newval['neg_votes'] = 0
                            

                    # Extra field: Set from which peer this info originates
                    newval['query_permids'] = [permid]
                        
                    # Filter out results from unwanted categories
                    flag = False
                    for cat in value['category']:
                        rank = catobj.getCategoryRank(cat)
                        if rank == -1:
                            if DEBUG:
                                print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHits: Got",`newval['name']`,"from banned category",cat,", discarded it."
                            flag = True
                            self.filteredResults += 1
                            break
                    if flag:
                        continue

                    if newval['infohash'] in self.remoteHits:
                        if DEBUG:
                            print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: merging hit",`newval['name']`

                        # merge this result with previous results
                        oldval = self.remoteHits[newval['infohash']]
                        for query_permid in newval['query_permids']:
                            if not query_permid in oldval['query_permids']:
                                oldval['query_permids'].append(query_permid)
                        
                        # if a hit belongs to a more popular channel, then replace the previous
                        if newval['channel_permid'] !="" and newval['channel_name'] != "" and newval['subscriptions']-newval['neg_votes'] > oldval['subscriptions']-oldval['neg_votes']:
                            oldval['subscriptions'] = newval['subscriptions']
                            oldval['neg_votes'] = newval['neg_votes']
                            oldval['channel_permid'] = newval['channel_permid']
                            oldval['channel_name'] = newval['channel_name']
                    else:
                        if DEBUG:
                            print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: appending hit",`newval['name']`

                        self.remoteHits[newval['infohash']] = newval
                        numResults +=1
                        # if numResults % 5 == 0:
                        # self.refreshGrid()
             
                if numResults > 0:
                    self.refreshGrid()
                    if DEBUG:
                        print >>sys.stderr,'TorrentSearchGridManager: gotRemoteHits: Refresh grid after new remote torrent hits came in'
                return True
            elif DEBUG:
                print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHits: got hits for",kws,"but current search is for",self.searchkeywords
            return False
        except:
            print_exc()
            return False
        
    def refreshGrid(self):
        if self.gridmgr is not None:
            self.gridmgr.refresh()

    #Rameez: The following code will call normalization functions and then 
    #sort and merge the torrent results
    def sort(self):
        self.doStatNormalization(self.hits, 'num_seeders', 'norm_num_seeders')
        self.doStatNormalization(self.hits, 'neg_votes', 'norm_neg_votes')
        self.doStatNormalization(self.hits, 'subscriptions', 'norm_subscriptions')

        def cmp(a,b):
            # normScores can be small, so multiply
            return int( 1000000.0 * ( 0.8*b.get('norm_num_seeders',0) + 0.1*b.get('norm_neg_votes',0) + 0.1*b.get('norm_subscriptions',0) -
                        0.8*a.get('norm_num_seeders',0) - 0.1*a.get('norm_neg_votes',0) - 0.1*a.get('norm_subscriptions',0) ))
           
        self.hits.sort(cmp)

    def doStatNormalization(self, hits, normKey, newKey):
        '''Center the variance on zero (this means mean == 0) and divide
        all values by the standard deviation. This is sometimes called scaling.
        This is done on the field normKey of hits and the output is added to a new 
        field called newKey.'''
        
        tot = 0

        for hit in hits:
            tot += (hit.get(normKey, 0) or 0)
        
        if len(hits) > 0:
            mean = tot/len(hits)
        else:
            mean = 0
        
        sum = 0
        for hit in hits:
            temp = (hit.get(normKey, 0) or 0) - mean
            temp = temp * temp
            sum += temp
        
        if len(hits) > 1:
            dev = sum /(len(hits)-1)
        else:
            dev = 0
        
        stdDev = sqrt(dev)
        
        for hit in hits:
            if stdDev > 0:
                hit[newKey] = ((hit.get(normKey, 0) or 0) - mean) / stdDev
            else:
                hit[newKey] = 0