def register(self, overlay_bridge, launchmany, config, bc_fac, log=''): if DEBUG: print >> sys.stderr, "rquery: register" self.overlay_bridge = overlay_bridge self.launchmany = launchmany self.search_manager = SearchManager(launchmany.torrent_db) self.peer_db = launchmany.peer_db self.config = config self.bc_fac = bc_fac # May be None if log: self.overlay_log = OverlayLogger.getInstance(log) self.registered = True
def connect(self): session = self.guiUtility.utility.session self.torrent_db = session.open_dbhandler(NTFY_TORRENTS) self.pref_db = session.open_dbhandler(NTFY_PREFERENCES) self.mypref_db = session.open_dbhandler(NTFY_MYPREFERENCES) self.search_db = session.open_dbhandler(NTFY_SEARCH) self.searchmgr = SearchManager(self.torrent_db) self.torrentsearch_manager = self.guiUtility.torrentsearch_manager
def connect(self): session = self.guiUtility.utility.session self.torrent_db = session.open_dbhandler(NTFY_TORRENTS) self.pref_db = session.open_dbhandler(NTFY_PREFERENCES) self.mypref_db = session.open_dbhandler(NTFY_MYPREFERENCES) self.search_db = session.open_dbhandler(NTFY_SEARCH) self.channelcast_db = session.open_dbhandler(NTFY_CHANNELCAST) self.votecastdb = session.open_dbhandler(NTFY_VOTECAST) self.searchmgr = SearchManager(self.channelcast_db) self.rtorrent_handler = RemoteTorrentHandler.getInstance()
class ChannelSearchGridManager: # Code to make this a singleton __single = None def __init__(self,guiUtility): if ChannelSearchGridManager.__single: raise RuntimeError, "ChannelSearchGridManager is singleton" ChannelSearchGridManager.__single = self self.guiUtility = guiUtility self.guiserver = GUITaskQueue.getInstance() # Contains all matches for keywords in DB, not filtered by category self.hits = {} self.searchmgr = None self.channelcast_db = None self.pref_db = None # Nic: for rerankers self.mypref_db = None self.search_db = None # For asking for a refresh when remote results came in self.gridmgr = None self.searchkeywords = [] self.oldsearchkeywords = [] self.category = Category.getInstance() def getInstance(*args, **kw): if ChannelSearchGridManager.__single is None: ChannelSearchGridManager(*args, **kw) return ChannelSearchGridManager.__single getInstance = staticmethod(getInstance) def connect(self): session = self.guiUtility.utility.session self.torrent_db = session.open_dbhandler(NTFY_TORRENTS) self.pref_db = session.open_dbhandler(NTFY_PREFERENCES) self.mypref_db = session.open_dbhandler(NTFY_MYPREFERENCES) self.search_db = session.open_dbhandler(NTFY_SEARCH) self.channelcast_db = session.open_dbhandler(NTFY_CHANNELCAST) self.votecastdb = session.open_dbhandler(NTFY_VOTECAST) self.searchmgr = SearchManager(self.channelcast_db) self.rtorrent_handler = RemoteTorrentHandler.getInstance() def set_gridmgr(self,gridmgr): self.gridmgr = gridmgr def getChannelHits(self): new_local_hits = self.searchLocalDatabase() if DEBUG: print >>sys.stderr,'ChannelSearchGridManager: getChannelHits: search found: %d items' % len(self.hits) if len(self.hits) == 0: return [0, None] else: return [len(self.hits),self.hits] def getNewChannels(self): #all channels with no votes + updated since two_months = time() - 5259487 newchannels = self.channelcast_db.getNewChannels(two_months) return [len(newchannels), newchannels] def getAllChannels(self): allchannels = self.channelcast_db.getAllChannels() return [len(allchannels), allchannels] def getSubscriptions(self): subscriptions = self.channelcast_db.getMySubscribedChannels() return [len(subscriptions), subscriptions] def getPopularChannels(self): pchannels = self.channelcast_db.getMostPopularChannels() return [len(pchannels), pchannels] def getUpdatedChannels(self): lchannels = self.channelcast_db.getLatestUpdated() return [len(lchannels), lchannels] def getMyVote(self, publisher_id): return self.votecastdb.getVote(publisher_id, bin2str(self.votecastdb.my_permid)) def getTorrentsFromMyChannel(self): return self.getTorrentsFromPublisherId(bin2str(self.votecastdb.my_permid)) def getTorrentFromPublisherId(self, publisher_id, infohash): return self.channelcast_db.getTorrentFromPublisherId(publisher_id, infohash) def getTorrentsFromPublisherId(self, publisher_id, keys = None): hits = self.channelcast_db.getTorrentsFromPublisherId(publisher_id, keys) self.nrFiltered = 0 enabledcattuples = self.category.getCategoryNames() enabledcatslow = ["other"] for catname,displayname in enabledcattuples: enabledcatslow.append(catname.lower()) def torrentFilter(torrent): okCategory = False categories = torrent.get("category", ["other"]) for torcat in categories: if torcat.lower() in enabledcatslow: okCategory = True break if not okCategory: self.nrFiltered += 1 okGood = torrent['status'] != 'dead' return okGood and okCategory hits = filter(torrentFilter, hits) return [len(hits), self.nrFiltered, hits] def getChannel(self, publisher_id): return self.channelcast_db.getChannel(publisher_id) def getChannels(self, publisher_ids): return self.channelcast_db.getChannels(publisher_ids) def spam(self, publisher_id): self.votecastdb.spam(publisher_id) self.channelcast_db.deleteTorrentsFromPublisherId(publisher_id) def favorite(self, publisher_id): self.votecastdb.subscribe(publisher_id) def remove_vote(self, publisher_id): self.votecastdb.unsubscribe(publisher_id) def getChannelForTorrent(self, infohash): return self.channelcast_db.getMostPopularChannelFromTorrent(infohash) def getNrTorrentsDownloaded(self, publisher_id): return self.channelcast_db.getNrTorrentsDownloaded(publisher_id) def setSearchKeywords(self, wantkeywords): self.searchkeywords = wantkeywords def searchLocalDatabase(self): """ Called by GetChannelHits() to search local DB. Caches previous query result. """ if self.searchkeywords == self.oldsearchkeywords and len(self.hits) > 0: if DEBUG: print >>sys.stderr,"ChannelSearchGridManager: searchLocalDB: returning old hit list", len(self.hits) return False self.oldsearchkeywords = self.searchkeywords if DEBUG: print >>sys.stderr,"ChannelSearchGridManager: searchLocalDB: Want",self.searchkeywords if len(self.searchkeywords) == 0 or len(self.searchkeywords) == 1 and self.searchkeywords[0] == '': return False query = "k " for i in self.searchkeywords: query = query + i + ' ' #self.hits = self.searchmgr.searchChannels(query) hits = self.searchmgr.searchChannels(query) # Nitin on Feb 5, 2010: temp fix: converting into string format coz most things in GUI use string forms. # Fields like permid, infohash, torrenthash are in binary format in each record in 'hits' list. votecache = {} self.hits = {} for hit in hits: if bin2str(hit[0]) not in self.hits: torrents = {} torrents[bin2str(hit[2])] = (hit[4], hit[5]) # {infohash:(torrentname, timestamp)} if hit[0] not in votecache: votecache[hit[0]] = self.votecastdb.getEffectiveVote(bin2str(hit[0])) self.hits[bin2str(hit[0])] = [hit[1], votecache[hit[0]], torrents] else: torrents = self.hits[bin2str(hit[0])][2] if bin2str(hit[2]) not in torrents: torrents[bin2str(hit[2])] = (hit[4], hit[5]) return True def gotRemoteHits(self, permid, kws, answers): """ Called by GUIUtil when hits come in. """ self.guiserver.add_task(lambda:self._gotRemoteHits(permid, kws, answers)) def _gotRemoteHits(self, permid, kws, answers): # # @param permid: the peer who returned the answer to the query # @param kws: the keywords of the query that originated the answer # @param answers: the filtered answer returned by the peer (publisher_id, publisher_name, infohash, torrenthash, torrentname, timestamp, key t1 = time() try: if DEBUG: print >>sys.stderr,"ChannelSearchGridManager: gotRemoteHist: got",len(answers),"for",kws # Always store the results, only display when in channelsMode # We got some replies. First check if they are for the current query if self.searchkeywords == kws: numResults = 0 for hit in answers.itervalues(): #Add to self.hits if hit[0] not in self.hits: self.hits[hit[0]] = [hit[1], self.votecastdb.getEffectiveVote(bin2str(hit[0])), {}] #Extend torrent dict for this channel torrents = self.hits[hit[0]][2] if hit[2] not in torrents: torrents[hit[2]] = (hit[4], hit[5]) numResults +=1 if numResults > 0: self.refreshGrid() if DEBUG: print >>sys.stderr,'ChannelSearchGridManager: gotRemoteHits: Refresh grid after new remote channel hits came in', "Took", time() - t1 return True elif DEBUG: print >>sys.stderr,"ChannelSearchGridManager: gotRemoteHits: got hits for",kws,"but current search is for",self.searchkeywords return False except: print_exc() return False def refreshGrid(self): if self.gridmgr is not None: self.gridmgr.refresh_channel()
class LibraryManager: # Code to make this a singleton __single = None def __init__(self,guiUtility): if LibraryManager.__single: raise RuntimeError, "LibraryManager is singleton" LibraryManager.__single = self self.guiUtility = guiUtility # Contains all matches for keywords in DB, not filtered by category self.hits = [] #current progress of download states self.cache_progress = {} self.rerankingStrategy = DefaultTorrentReranker() # For asking for a refresh when remote results came in self.gridmgr = None self.guiserver = GUITaskQueue.getInstance() # Gui callbacks self.gui_callback = [] self.user_download_choice = UserDownloadChoice.get_singleton() def getInstance(*args, **kw): if LibraryManager.__single is None: LibraryManager(*args, **kw) return LibraryManager.__single getInstance = staticmethod(getInstance) def _get_videoplayer(self, exclude=None): """ Returns the VideoPlayer instance and ensures that it knows if there are other downloads running. """ other_downloads = False for ds in self.dslist: if ds is not exclude and ds.get_status() not in (DLSTATUS_STOPPED, DLSTATUS_STOPPED_ON_ERROR): other_downloads = True break videoplayer = VideoPlayer.getInstance() videoplayer.set_other_downloads(other_downloads) self.guiUtility.ShowPlayer(True) return videoplayer def download_state_gui_callback(self, dslist): """ Called by GUIThread """ self.dslist = dslist for callback in self.gui_callback: try: callback(dslist) except: print_exc() self.remove_download_state_callback(callback) #TODO: This seems like the wrong place to do this? self.guiserver.add_task(lambda:self.updateProgressInDB(dslist),0) def updateProgressInDB(self, dslist): updates = False for ds in dslist: infohash = ds.get_download().get_def().get_infohash() progress = (ds.get_progress() or 0.0) * 100.0 #update progress if difference is larger than 5% if progress - self.cache_progress.get(infohash, 0) > 5: self.cache_progress[infohash] = progress try: self.mypref_db.updateProgress(infohash, progress, commit = False) updates = True except: print_exc() if updates: self.mypref_db.commit() def add_download_state_callback(self, callback): if callback not in self.gui_callback: self.gui_callback.append(callback) def remove_download_state_callback(self, callback): if callback in self.gui_callback: self.gui_callback.remove(callback) def addDownloadState(self, torrent): # Add downloadstate data to list of torrent dicts for ds in self.dslist: try: infohash = ds.get_download().get_def().get_infohash() if torrent['infohash'] == infohash: torrent['ds'] = ds break except: pass return torrent def addDownloadStates(self, liblist): # Add downloadstate data to list of torrent dicts for ds in self.dslist: try: infohash = ds.get_download().get_def().get_infohash() for torrent in liblist: if torrent['infohash'] == infohash: torrent['ds'] = ds break except: pass return liblist def playTorrent(self, torrent, selectedinfilename = None): ds = torrent.get('ds') videoplayer = self._get_videoplayer(ds) videoplayer.stop_playback() videoplayer.show_loading() if ds is None: #Making sure we actually have this .torrent callback = lambda infohash, metadata, filename: self.playTorrent(torrent) filename = self.torrentsearch_manager.getTorrent(torrent, callback) if isinstance(filename, basestring): #got actual filename, load torrentdef and create downloadconfig tdef = TorrentDef.load(filename) defaultDLConfig = DefaultDownloadStartupConfig.getInstance() dscfg = defaultDLConfig.copy() videoplayer.start_and_play(tdef, dscfg, selectedinfilename) else: videoplayer.play(ds, selectedinfilename) def deleteTorrent(self, torrent, removecontent = False): self.deleteTorrentDS(torrent.get('ds'), torrent['infohash'], removecontent) def deleteTorrentDS(self, ds, infohash, removecontent = False): if not ds is None: videoplayer = VideoPlayer.getInstance() playd = videoplayer.get_vod_download() if playd == ds.download: self._get_videoplayer(ds).stop_playback() self.deleteTorrentDownload(ds.get_download(), infohash, removecontent) def deleteTorrentDownload(self, download, infohash, removecontent = False, removestate = True): self.guiUtility.utility.session.remove_download(download, removecontent = removecontent, removestate = removestate) if infohash: # Johan, 2009-03-05: we need long download histories for good # semantic clustering. # Arno, 2009-03-10: Not removing it from MyPref means it keeps showing # up in the Library, even after removal :-( H4x0r this. self.mypref_db.updateDestDir(infohash,"") self.user_download_choice.remove_download_state(infohash) def set_gridmgr(self,gridmgr): self.gridmgr = gridmgr def connect(self): session = self.guiUtility.utility.session self.torrent_db = session.open_dbhandler(NTFY_TORRENTS) self.pref_db = session.open_dbhandler(NTFY_PREFERENCES) self.mypref_db = session.open_dbhandler(NTFY_MYPREFERENCES) self.search_db = session.open_dbhandler(NTFY_SEARCH) self.searchmgr = SearchManager(self.torrent_db) self.torrentsearch_manager = self.guiUtility.torrentsearch_manager def getHitsInCategory(self, sort = 'rameezmetric'): if DEBUG: begintime = time() def torrentFilter(torrent): return torrent.get('myDownloadHistory', False) and torrent.get('destdir',"") != "" self.hits = self.searchmgr.searchLibrary() self.hits = filter(torrentFilter,self.hits) if DEBUG: beginsort = time() if sort == 'rameezmetric': self.sort() # Nic: Ok this is somewhat diagonal to the previous sorting algorithms # eventually, these should probably be combined # since for now, however, my reranking is very tame (exchanging first and second place under certain circumstances) # this should be fine... self.hits = self.rerankingStrategy.rerank(self.hits, '', self.torrent_db, self.pref_db, self.mypref_db, self.search_db) if DEBUG: print >> sys.stderr, 'getHitsInCat took: %s of which sort took %s' % ((time() - begintime), (time() - beginsort)) self.hits = self.addDownloadStates(self.hits) return [len(self.hits), 0 , self.hits] def refreshGrid(self): if self.gridmgr is not None: self.gridmgr.refresh()
class TorrentManager: # Code to make this a singleton __single = None def __init__(self,guiUtility): if TorrentManager.__single: raise RuntimeError, "TorrentSearchGridManager is singleton" TorrentManager.__single = self self.guiUtility = guiUtility # Contains all matches for keywords in DB, not filtered by category self.hits = [] # Remote results for current keywords self.remoteHits = {} # For asking for a refresh when remote results came in self.gridmgr = None self.guiserver = GUITaskQueue.getInstance() self.searchkeywords = [] self.rerankingStrategy = DefaultTorrentReranker() self.oldsearchkeywords = [] self.filteredResults = 0 self.bundler = Bundler() self.bundle_mode = None self.category = Category.getInstance() def getInstance(*args, **kw): if TorrentManager.__single is None: TorrentManager(*args, **kw) return TorrentManager.__single getInstance = staticmethod(getInstance) def getCollectedFilename(self, torrent): torrent_dir = self.guiUtility.utility.session.get_torrent_collecting_dir() if 'torrent_file_name' not in torrent or not torrent['torrent_file_name']: torrent['torrent_file_name'] = get_collected_torrent_filename(torrent['infohash']) torrent_filename = os.path.join(torrent_dir, torrent['torrent_file_name']) #.torrent found, return complete filename if os.path.isfile(torrent_filename): return torrent_filename #.torrent not found, possibly a new torrent_collecting_dir torrent['torrent_file_name'] = get_collected_torrent_filename(torrent['infohash']) torrent_filename = os.path.join(torrent_dir, torrent['torrent_file_name']) if os.path.isfile(torrent_filename): return torrent_filename def getTorrent(self, torrent, callback): """ TORRENT is a dictionary containing torrent information used to display the entry on the UI. it is NOT the torrent file! CALLBACK is called when the torrent is downloaded. When no torrent can be downloaded the callback is ignored Returns a filename, if filename is known or a boolean + request_type describing if the torrent is requested """ torrent_filename = self.getCollectedFilename(torrent) if torrent_filename: return torrent_filename #.torrent not found, try to download from peers if self.downloadTorrentfileFromPeers(torrent, callback): return (True, "from peers") return (False, "could not get torrent") def downloadTorrentfileFromPeers(self, torrent, callback, duplicate=True, prio = 0): """ TORRENT is a dictionary containing torrent information used to display the entry on the UI. it is NOT the torrent file! CALLBACK is called when the torrent is downloaded. When no torrent can be downloaded the callback is ignored DUPLICATE can be True: the file will be downloaded from peers regardless of a previous/current download attempt (returns True). Or DUPLICATE can be False: the file will only be downloaded when it was not yet attempted to download (when False is returned no callback will be made) PRIO is the priority, default is 0 which means we need this torrent now. If PRIO != 0, then a rate limiter could be used by the remotetorrentrequester Returns True or False """ # return False when duplicate if not duplicate and torrent.get('query_torrent_was_requested', False): return False torrent['query_torrent_was_requested'] = True if not 'query_permids' in torrent or len(torrent['query_permids']) == 0: self.guiUtility.utility.session.download_torrentfile(torrent['infohash'], callback, prio) else: for permid in torrent['query_permids']: self.guiUtility.utility.session.download_torrentfile_from_peer(permid, torrent['infohash'], callback, prio) return True def downloadTorrent(self, torrent, dest = None, secret = False, vodmode = False, selectedFiles = None): callback = lambda infohash, metadata, filename: self.downloadTorrent(torrent, dest, secret, vodmode, selectedFiles) callback.__name__ = "downloadTorrent_callback" torrent_filename = self.getTorrent(torrent, callback) if isinstance(torrent_filename, basestring): #got actual filename if torrent.get('name'): name = torrent['name'] else: name = torrent['infohash'] clicklog={'keywords': self.searchkeywords, 'reranking_strategy': self.rerankingStrategy.getID()} if "click_position" in torrent: clicklog["click_position"] = torrent["click_position"] # Api download d = self.guiUtility.frame.startDownload(torrent_filename,destdir=dest,clicklog=clicklog,name=name,vodmode=vodmode, selectedFiles = selectedFiles) ## remove name=name if d: if secret: self.torrent_db.setSecret(torrent['infohash'], secret) if DEBUG: print >>sys.stderr,'standardDetails: download: download started' torrent['myDownloadHistory'] = True elif torrent_filename[0]: #torrent is being requested from peers, using callback this function will be called again return torrent_filename[1] else: #torrent not found def showdialog(): str = self.guiUtility.utility.lang.get('delete_torrent') % torrent['name'] dlg = wx.MessageDialog(self.guiUtility.frame, str, self.guiUtility.utility.lang.get('delete_dead_torrent'), wx.YES_NO|wx.NO_DEFAULT|wx.ICON_QUESTION) result = dlg.ShowModal() dlg.Destroy() if result == wx.ID_YES: infohash = torrent['infohash'] self.torrent_db.deleteTorrent(infohash, delete_file=True, commit = True) wx.CallAfter(showdialog) def isTorrentPlayable(self, torrent, default=(False, [], []), callback=None): """ TORRENT is a dictionary containing torrent information used to display the entry on the UI. it is NOT the torrent file! DEFAULT indicates the default value when we don't know if the torrent is playable. CALLBACK can be given to result the actual 'playable' value for the torrent after some downloading/processing. The DEFAULT value is returned in this case. Will only be called if self.item == torrent The return value is a tuple consisting of a boolean indicating if the torrent is playable and a list. If the torrent is not playable or if the default value is returned the boolean is False and the list is empty. If it is playable the boolean is true and the list returned consists of the playable files within the actual torrent. """ torrent_callback = lambda infohash, metadata, filename: self.isTorrentPlayable(torrent, default, callback) torrent_callback.__name__ = "isTorrentPlayable_callback" torrent_filename = self.getTorrent(torrent, torrent_callback) if isinstance(torrent_filename, basestring): #got actual filename tdef = TorrentDef.load(torrent_filename) files = tdef.get_files_as_unicode(exts=videoextdefaults) allfiles = tdef.get_files_as_unicode_with_length() playable = len(files) > 0 torrent['comment'] = tdef.get_comment_as_unicode() if tdef.get_tracker_hierarchy(): torrent['trackers'] = tdef.get_tracker_hierarchy() else: torrent['trackers'] = [[tdef.get_tracker()]] if not callback is None: callback(torrent, (playable, files, allfiles)) else: return torrent, (playable, files, allfiles) elif not torrent_filename[0]: if DEBUG: print >>sys.stderr, "standardDetails:torrent_is_playable returning default", default callback(torrent, default) else: return torrent_filename[1] def getSwarmInfo(self, infohash): return self.torrent_db.getSwarmInfoByInfohash(infohash) def set_gridmgr(self,gridmgr): self.gridmgr = gridmgr def connect(self): session = self.guiUtility.utility.session self.torrent_db = session.open_dbhandler(NTFY_TORRENTS) self.pref_db = session.open_dbhandler(NTFY_PREFERENCES) self.mypref_db = session.open_dbhandler(NTFY_MYPREFERENCES) self.search_db = session.open_dbhandler(NTFY_SEARCH) self.votecastdb = session.open_dbhandler(NTFY_VOTECAST) self.searchmgr = SearchManager(self.torrent_db) self.library_manager = self.guiUtility.library_manager def getHitsInCategory(self, categorykey = 'all', sort = 'rameezmetric'): if DEBUG: begintime = time() # categorykey can be 'all', 'Video', 'Document', ... bundle_mode = self.bundle_mode if DEBUG: print >>sys.stderr,"TorrentSearchManager: getHitsInCategory:",categorykey categorykey = categorykey.lower() enabledcattuples = self.category.getCategoryNames() enabledcatslow = ["other"] for catname,_ in enabledcattuples: enabledcatslow.append(catname.lower()) # TODO: do all filtering in DB query def torrentFilter(torrent): #show dead torrents in library okCategory = False if not okCategory: categories = torrent.get("category", []) if not categories: categories = ["other"] if categorykey == 'all': for torcat in categories: if torcat.lower() in enabledcatslow: okCategory = True break elif categorykey in [cat.lower() for cat in categories]: okCategory = True if not okCategory: self.filteredResults += 1 okGood = torrent['status'] != 'dead' #print >>sys.stderr,"FILTER: lib",okLibrary,"cat",okCategory,"good",okGood return okCategory and okGood # 1. Local search puts hits in self.hits if DEBUG: beginlocalsearch = time() new_local_hits = self.searchLocalDatabase() if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: search found: %d items took %s' % (len(self.hits), time() - beginlocalsearch) # 2. Filter self.hits on category and status if DEBUG: beginfilterhits = time() if new_local_hits: self.hits = filter(torrentFilter, self.hits) if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: torrentFilter after filter found: %d items took %s' % (len(self.hits), time() - beginfilterhits) # 3. Add remote hits that may apply. TODO: double filtering, could # add remote hits to self.hits before filter(torrentFilter,...) self.addStoredRemoteResults() if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: found after remote search: %d items' % len(self.hits) if DEBUG: beginsort = time() if sort == 'rameezmetric': self.sort() # Nic: Ok this is somewhat diagonal to the previous sorting algorithms # eventually, these should probably be combined # since for now, however, my reranking is very tame (exchanging first and second place under certain circumstances) # this should be fine... self.hits = self.rerankingStrategy.rerank(self.hits, self.searchkeywords, self.torrent_db, self.pref_db, self.mypref_db, self.search_db) # boudewijn: now that we have sorted the search results we # want to prefetch the top N torrents. self.guiserver.add_task(self.prefetch_hits, t = 1, id = "PREFETCH_RESULTS") self.hits = self.library_manager.addDownloadStates(self.hits) if DEBUG: beginbundle = time() # vliegendhart: do grouping here # Niels: important, we should not change self.hits otherwise prefetching will not work returned_hits, selected_bundle_mode = self.bundler.bundle(self.hits, bundle_mode, self.searchkeywords) if DEBUG: print >> sys.stderr, 'getHitsInCat took: %s of which sort took %s, bundle took %s' % (time() - begintime, beginbundle - beginsort, time() - beginbundle) #return [len(self.hits), self.filteredResults , self.hits] return [len(returned_hits), self.filteredResults , selected_bundle_mode, returned_hits] def prefetch_hits(self): """ Prefetching attempts to reduce the time required to get the user the data it wants. We assume the torrent at the beginning of self.hits are more likely to be selected by the user than the ones at the end. This allows us to perform prefetching operations on a subselection of these items. The prefetch_hits function can be called multiple times. It will only attempt to prefetch every PREFETCH_DELAY seconds. This gives search results from multiple sources the chance to be received and sorted before prefetching a subset. """ if DEBUG: begin_time = time() torrent_dir = Session.get_instance().get_torrent_collecting_dir() hit_counter = 0 prefetch_counter = 0 # prefetch .torrent files if they are from buddycast sources for hit in self.hits: def sesscb_prefetch_done(infohash, metadata, filename): if DEBUG: # find the origional hit for hit in self.hits: if hit["infohash"] == infohash: print >> sys.stderr, "Prefetch: in", "%.1fs" % (time() - begin_time), `hit["name"]` return print >> sys.stderr, "Prefetch BUG. We got a hit from something we didn't ask for" torrent_filename = self.getCollectedFilename(hit) if not torrent_filename: if self.downloadTorrentfileFromPeers(hit, sesscb_prefetch_done, duplicate=False, prio = 1): if DEBUG: print >> sys.stderr, "Prefetch: attempting to download", `hit["name"]` prefetch_counter += 1 hit_counter += 1 if prefetch_counter >= 10 or hit_counter >= 25: # (1) prefetch a maximum of N hits # (2) prefetch only from the first M hits # (.) wichever is lowest or (1) or (2) break def getSearchKeywords(self ): return self.searchkeywords, len(self.hits), self.filteredResults def setSearchKeywords(self, wantkeywords): if wantkeywords != self.searchkeywords: self.bundle_mode = None self.searchkeywords = wantkeywords if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: keywords:", self.searchkeywords,";time:%", time() self.filteredResults = 0 self.remoteHits = {} self.oldsearchkeywords = '' def setBundleMode(self, bundle_mode): if bundle_mode != self.bundle_mode: self.bundle_mode = bundle_mode self.refreshGrid() def searchLocalDatabase(self): """ Called by GetHitsInCategory() to search local DB. Caches previous query result. """ if self.searchkeywords == self.oldsearchkeywords and len(self.hits) > 0: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: searchLocalDB: returning old hit list",len(self.hits) return False self.oldsearchkeywords = self.searchkeywords if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: searchLocalDB: Want",self.searchkeywords if len(self.searchkeywords) == 0 or len(self.searchkeywords) == 1 and self.searchkeywords[0] == '': return False self.hits = self.searchmgr.search(self.searchkeywords) return True def addStoredRemoteResults(self): """ Called by GetHitsInCategory() to add remote results to self.hits """ if len(self.remoteHits) > 0: numResults = 0 def catFilter(item): icat = item.get('category') if type(icat) == list: icat = icat[0].lower() elif type(icat) == str: icat = icat.lower() else: return False #catResults = filter(catFilter, self.remoteHits.values()) catResults = self.remoteHits.values() if DEBUG: print >> sys.stderr,"TorrentSearchGridManager: remote: Adding %d remote results (%d in category)" % (len(self.remoteHits), len(catResults)) for remoteItem in catResults: known = False for item in self.hits: #print >> sys.stderr,"TorrentSearchGridManager: remote: Should we add",`remoteItem['name']` if item['infohash'] == remoteItem['infohash']: known = True # if a hit belongs to a more popular channel, then replace the previous """ if remoteItem['channel_permid'] !="" and remoteItem['channel_name'] != "" and remoteItem['subscriptions']-remoteItem['neg_votes'] > item['subscriptions']-item['neg_votes']: item['subscriptions'] = remoteItem['subscriptions'] item['neg_votes'] = remoteItem['neg_votes'] item['channel_permid'] = remoteItem['channel_permid'] item['channel_name'] = remoteItem['channel_name'] """ break if not known: #print >> sys.stderr,"TorrentSearchGridManager: remote: Adding",`remoteItem['name']` self.hits.append(remoteItem) numResults+=1 def gotRemoteHits(self, permid, kws, answers): """ Called by GUIUtil when hits come in. 29/06/11 boudewijn: from now on called on the GUITaskQueue instead on the wx MainThread to avoid blocking the GUI because of the database queries. """ self.guiserver.add_task(lambda: self._gotRemoteHits(permid, kws, answers)) def _gotRemoteHits(self, permid, kws, answers): try: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: got",len(answers),"unfiltered results for",kws, bin2str(permid), time() # Always store the results, only display when in filesMode # We got some replies. First check if they are for the current query if self.searchkeywords == kws: numResults = 0 catobj = Category.getInstance() for key,value in answers.iteritems(): if self.torrent_db.hasTorrent(key): if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: Ignoring hit for",`value['content_name']`,"already got it" continue # do not show results we have ourselves # First, check if it matches the word boundaries, that belongs to previous version # Convert answer fields as per # Session.query_connected_peers() spec. to NEWDB format newval = {} newval['name'] = value['content_name'] newval['infohash'] = key newval['torrent_file_name'] = '' newval['length'] = value['length'] newval['creation_date'] = time() # None gives '?' in GUI newval['relevance'] = 0 newval['source'] = 'RQ' newval['category'] = value['category'][0] # We trust the peer newval['status'] = 'good' newval['num_seeders'] = value['seeder'] or 0 newval['num_leechers'] = value['leecher'] or 0 # OLPROTO_VER_NINETH includes a torrent_size. Set to # -1 when not available. if 'torrent_size' in value: newval['torrent_size'] = value['torrent_size'] else: newval['torrent_size'] = -1 # OLPROTO_VER_ELEVENTH includes channel_permid, channel_name fields. if 'channel_permid' not in value: # just to check if it is not OLPROTO_VER_ELEVENTH version # if so, check word boundaries in the swarm name ls = split_into_keywords(value['content_name']) if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: ls is",`ls` print >>sys.stderr,"TorrentSearchGridManager: kws is",`kws` flag = False for kw in kws: if kw not in ls: flag=True break if flag: continue if 'channel_permid' in value: newval['channel_permid']=value['channel_permid'] else: newval['channel_permid']="" if 'channel_name' in value: newval['channel_name'] = value['channel_name'] else: newval['channel_name']="" if 'channel_permid' in value: newval['neg_votes'] = self.votecastdb.getNegVotes(value['channel_permid']) newval['subscriptions'] = self.votecastdb.getNumSubscriptions(value['channel_permid']) if newval['subscriptions']-newval['neg_votes']<VOTE_LIMIT: # now, this is SPAM continue else: newval['subscriptions']=0 newval['neg_votes'] = 0 # Extra field: Set from which peer this info originates newval['query_permids'] = [permid] # Filter out results from unwanted categories flag = False for cat in value['category']: rank = catobj.getCategoryRank(cat) if rank == -1: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHits: Got",`newval['name']`,"from banned category",cat,", discarded it." flag = True self.filteredResults += 1 break if flag: continue if newval['infohash'] in self.remoteHits: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: merging hit",`newval['name']` # merge this result with previous results oldval = self.remoteHits[newval['infohash']] for query_permid in newval['query_permids']: if not query_permid in oldval['query_permids']: oldval['query_permids'].append(query_permid) # if a hit belongs to a more popular channel, then replace the previous if newval['channel_permid'] !="" and newval['channel_name'] != "" and newval['subscriptions']-newval['neg_votes'] > oldval['subscriptions']-oldval['neg_votes']: oldval['subscriptions'] = newval['subscriptions'] oldval['neg_votes'] = newval['neg_votes'] oldval['channel_permid'] = newval['channel_permid'] oldval['channel_name'] = newval['channel_name'] else: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: appending hit",`newval['name']` self.remoteHits[newval['infohash']] = newval numResults +=1 # if numResults % 5 == 0: # self.refreshGrid() if numResults > 0: self.refreshGrid() if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: gotRemoteHits: Refresh grid after new remote torrent hits came in' return True elif DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHits: got hits for",kws,"but current search is for",self.searchkeywords return False except: print_exc() return False def refreshGrid(self): if self.gridmgr is not None: self.gridmgr.refresh() #Rameez: The following code will call normalization functions and then #sort and merge the torrent results def sort(self): self.doStatNormalization(self.hits, 'num_seeders', 'norm_num_seeders') self.doStatNormalization(self.hits, 'neg_votes', 'norm_neg_votes') self.doStatNormalization(self.hits, 'subscriptions', 'norm_subscriptions') def cmp(a,b): # normScores can be small, so multiply return int( 1000000.0 * ( 0.8*b.get('norm_num_seeders',0) + 0.1*b.get('norm_neg_votes',0) + 0.1*b.get('norm_subscriptions',0) - 0.8*a.get('norm_num_seeders',0) - 0.1*a.get('norm_neg_votes',0) - 0.1*a.get('norm_subscriptions',0) )) self.hits.sort(cmp) def doStatNormalization(self, hits, normKey, newKey): '''Center the variance on zero (this means mean == 0) and divide all values by the standard deviation. This is sometimes called scaling. This is done on the field normKey of hits and the output is added to a new field called newKey.''' tot = 0 for hit in hits: tot += (hit.get(normKey, 0) or 0) if len(hits) > 0: mean = tot/len(hits) else: mean = 0 sum = 0 for hit in hits: temp = (hit.get(normKey, 0) or 0) - mean temp = temp * temp sum += temp if len(hits) > 1: dev = sum /(len(hits)-1) else: dev = 0 stdDev = sqrt(dev) for hit in hits: if stdDev > 0: hit[newKey] = ((hit.get(normKey, 0) or 0) - mean) / stdDev else: hit[newKey] = 0
def register(self, torrent_db, gridmgr): self.torrent_db = torrent_db self.searchmgr = SearchManager(torrent_db) self.gridmgr = gridmgr
def register(self, peer_db, friend_db, gridmgr): self.psearchmgr = SearchManager(peer_db) self.fsearchmgr = SearchManager(friend_db) self.gridmgr = gridmgr
class PeerSearchGridManager: # Code to make this a singleton __single = None def __init__(self, guiUtility): if PeerSearchGridManager.__single: raise RuntimeError, "PeerSearchGridManager is singleton" PeerSearchGridManager.__single = self self.guiUtility = guiUtility # Contains all matches for keywords in DB, not filtered by category self.hits = [] # Jelle's word filter self.psearchmgr = None self.fsearchmgr = None self.gridmgr = None self.standardOverview = None self.searchkeywords = {'personsMode': [], 'friendsMode': []} self.oldsearchkeywords = { 'personsMode': [], 'friendsMode': [] } # previous query def getInstance(*args, **kw): if PeerSearchGridManager.__single is None: PeerSearchGridManager(*args, **kw) return PeerSearchGridManager.__single getInstance = staticmethod(getInstance) def register(self, peer_db, friend_db, gridmgr): self.psearchmgr = SearchManager(peer_db) self.fsearchmgr = SearchManager(friend_db) self.gridmgr = gridmgr def getHits(self, mode, range): # mode is 'personsMode', 'friendsMode' if DEBUG: print >> sys.stderr, "PeerSearchGridManager: getHitsIn:", mode, range if not self.standardOverview: self.standardOverview = self.guiUtility.standardOverview # Local search puts hits in self.hits self.searchLocalDatabase(mode) if DEBUG: print >> sys.stderr, 'PeerSearchGridManager: getHitsInCat: search found: %d items' % len( self.hits) if DEBUG: print >> sys.stderr, 'PeerSearchGridManager: getHitsInCat: torrentFilter after search found: %d items' % len( self.hits) if mode == 'personsMode': searchType = 'peers' elif mode == 'friendsMode': searchType = 'friends' self.standardOverview.setSearchFeedback(searchType, True, len(self.hits), self.searchkeywords[mode]) if range[0] > len(self.hits): return [0, None] elif range[1] > len(self.hits): end = len(self.hits) else: end = range[1] begin = range[0] return [len(self.hits), self.hits[begin:end]] def setSearchKeywords(self, wantkeywords, mode): if len(wantkeywords) == 0: print_stack() self.searchkeywords[mode] = wantkeywords def inSearchMode(self, mode): if DEBUG: print >> sys.stderr, "PeerSearchGridManager: inSearchMode?", self.searchkeywords[ mode] return bool(self.searchkeywords[mode]) def stopSearch(self): pass def searchLocalDatabase(self, mode): """ Called by getHits() to search local DB. Caches previous query result. """ if self.searchkeywords[mode] == self.oldsearchkeywords[mode] and len( self.hits) > 0: if DEBUG: print >> sys.stderr, "PeerSearchGridManager: searchLocalDB: returning old hit list", len( self.hits) return self.hits if DEBUG: print >> sys.stderr, "PeerSearchGridManager: searchLocalDB: Want", self.searchkeywords[ mode] if len(self.searchkeywords[mode]) == 0 or len( self.searchkeywords[mode] ) == 1 and self.searchkeywords[mode][0] == '': return self.hits if mode == 'personsMode': self.hits = self.psearchmgr.search(self.searchkeywords[mode]) else: self.hits = self.fsearchmgr.search(self.searchkeywords[mode]) return self.hits
class TorrentSearchGridManager: # Code to make this a singleton __single = None def __init__(self, guiUtility): if TorrentSearchGridManager.__single: raise RuntimeError, "TorrentSearchGridManager is singleton" TorrentSearchGridManager.__single = self self.guiUtility = guiUtility # Contains all matches for keywords in DB, not filtered by category self.hits = [] # Remote results for current keywords self.remoteHits = {} self.dod = None # Jelle's word filter self.searchmgr = None self.torrent_db = None # For asking for a refresh when remote results came in self.gridmgr = None self.standardOverview = None self.searchkeywords = {'filesMode': [], 'libraryMode': []} self.oldsearchkeywords = { 'filesMode': [], 'libraryMode': [] } # previous query self.category = Category.getInstance() def getInstance(*args, **kw): if TorrentSearchGridManager.__single is None: TorrentSearchGridManager(*args, **kw) return TorrentSearchGridManager.__single getInstance = staticmethod(getInstance) def register(self, torrent_db, gridmgr): self.torrent_db = torrent_db self.searchmgr = SearchManager(torrent_db) self.gridmgr = gridmgr def getHitsInCategory(self, mode, categorykey, range): # mode is 'filesMode', 'libraryMode' # categorykey can be 'all', 'Video', 'Document', ... if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: getHitsInCategory:", mode, categorykey, range categorykey = categorykey.lower() enabledcattuples = self.category.getCategoryNames() enabledcatslow = ["other"] for catname, displayname in enabledcattuples: enabledcatslow.append(catname.lower()) if not self.standardOverview: self.standardOverview = self.guiUtility.standardOverview # TODO: do all filtering in DB query def torrentFilter(torrent): library = (mode == 'libraryMode') okLibrary = not library or torrent.get('myDownloadHistory', False) okCategory = False categories = torrent.get("category", []) if not categories: categories = ["other"] if categorykey == 'all': for torcat in categories: if torcat.lower() in enabledcatslow: okCategory = True break elif categorykey in [cat.lower() for cat in categories]: okCategory = True okGood = torrent['status'] == 'good' or torrent.get( 'myDownloadHistory', False) print >> sys.stderr, "FILTER: lib", okLibrary, "cat", okCategory, "good", okGood return okLibrary and okCategory and okGood # 1. Local search puts hits in self.hits self.searchLocalDatabase(mode) if DEBUG: print >> sys.stderr, 'TorrentSearchGridManager: getHitsInCat: search found: %d items' % len( self.hits) # 2. Filter self.hits on category and status self.hits = filter(torrentFilter, self.hits) if DEBUG: print >> sys.stderr, 'TorrentSearchGridManager: getHitsInCat: torrentFilter after search found: %d items' % len( self.hits) self.standardOverview.setSearchFeedback('web2', False, -1, self.searchkeywords[mode]) self.standardOverview.setSearchFeedback('remote', False, -1, self.searchkeywords[mode]) if mode == 'filesMode': self.standardOverview.setSearchFeedback('torrent', False, len(self.hits), self.searchkeywords[mode]) elif mode == 'libraryMode': # set finished true and use other string self.standardOverview.setSearchFeedback('library', True, len(self.hits), self.searchkeywords[mode]) # 3. Add remote hits that may apply. TODO: double filtering, could # add remote hits to self.hits before filter(torrentFilter,...) self.addStoredRemoteResults(mode, categorykey) if DEBUG: print >> sys.stderr, 'TorrentSearchGridManager: getHitsInCat: found after search: %d items' % len( self.hits) self.addStoredWeb2Results(mode, categorykey, range) # if self.inSearchMode(mode): # self.standardOverview.setSearchFeedback('torrent', True, len(self.hits)) if range[0] > len(self.hits): return [0, None] elif range[1] > len(self.hits): end = len(self.hits) else: end = range[1] begin = range[0] self.sort() return [len(self.hits), self.hits[begin:end]] def setSearchKeywords(self, wantkeywords, mode): # if len(wantkeywords) == 0: # print_stack() self.searchkeywords[mode] = wantkeywords if mode == 'filesMode': self.remoteHits = {} if self.dod: self.dod.clear() def inSearchMode(self, mode): return bool(self.searchkeywords.get(mode)) def stopSearch(self): # TODO if self.dod: self.dod.stop() def getCurrentHitsLen(self): return len(self.hits) def searchLocalDatabase(self, mode): """ Called by GetHitsInCategory() to search local DB. Caches previous query result. """ if self.searchkeywords[mode] == self.oldsearchkeywords[mode] and len( self.hits) > 0: if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: searchLocalDB: returning old hit list", len( self.hits) return self.hits if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: searchLocalDB: Want", self.searchkeywords[ mode] if len(self.searchkeywords[mode]) == 0 or len( self.searchkeywords[mode] ) == 1 and self.searchkeywords[mode][0] == '': return self.hits self.hits = self.searchmgr.search(self.searchkeywords[mode]) return self.hits def addStoredRemoteResults(self, mode, cat): """ Called by GetHitsInCategory() to add remote results to self.hits """ if len(self.remoteHits) > 0: numResults = 0 def catFilter(item): icat = item.get('category') if type(icat) == list: icat = icat[0].lower() elif type(icat) == str: icat = icat.lower() else: return False return icat == cat or cat == 'all' catResults = filter(catFilter, self.remoteHits.values()) if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: remote: Adding %d remote results (%d in category)" % ( len(self.remoteHits), len(catResults)) for remoteItem in catResults: known = False for item in self.hits: if item['infohash'] == remoteItem['infohash']: known = True break if not known: self.hits.append(remoteItem) numResults += 1 self.standardOverview.setSearchFeedback('remote', False, numResults, self.searchkeywords[mode]) def gotRemoteHits(self, permid, kws, answers, mode): """ Called by GUIUtil when hits come in. """ print >> sys.stderr, "rmote each time, so we can call sort here, hehe" try: if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: gotRemoteHist: got", len( answers) # Always store the results, only display when in filesMode # We got some replies. First check if they are for the current query if self.searchkeywords['filesMode'] == kws: numResults = 0 catobj = Category.getInstance() for key, value in answers.iteritems(): if self.torrent_db.hasTorrent(key): continue # do not show results we have ourselves # Convert answer fields as per # Session.query_connected_peers() spec. to NEWDB format newval = {} newval['name'] = value['content_name'] newval['infohash'] = key newval['torrent_file_name'] = '' newval['length'] = value['length'] newval['creation_date'] = time() # None gives '?' in GUI newval['relevance'] = 0 newval['source'] = 'RQ' newval['category'] = value['category'][0] # We trust the peer newval['status'] = 'good' newval['num_seeders'] = value['seeder'] newval['num_leechers'] = value['leecher'] # Extra fiedl: Set from which peer this info originates newval['query_permid'] = permid if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: gotRemoteHist: appending hit", ` newval[ 'name'] ` #value['name'] = 'REMOTE '+value['name'] # Filter out results from unwanted categories flag = False for cat in value['category']: rank = catobj.getCategoryRank(cat) if rank == -1: if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: gotRemoteHits: Got", ` newval[ 'name'] `, "from banned category", cat, ", discarded it." flag = True break if flag: continue # TODO: select best result? if not (newval['infohash'] in self.remoteHits): self.remoteHits[newval['infohash']] = newval if mode == 'filesMode' and self.standardOverview.getSearchBusy( ): self.refreshGrid() # if self.notifyView(value, 'add'): # numResults +=1 #self.standardOverview.setSearchFeedback('remote', False, numResults, self.searchkeywords[mode]) return True elif DEBUG: print >> sys.stderr, "TorrentSearchGridManager: gotRemoteHist: got hits for", kws, "but current search is for", self.searchkeywords[ mode] return False except: print_exc() return False def refreshGrid(self): if self.gridmgr is not None: self.gridmgr.refresh() def notifyView(self, value, cmd): print >> sys.stderr, "TorrentSearchGridManager: notfyView ###########################", cmd, ` value ` # # Move to Web2SearchGridManager # def searchWeb2(self, initialnum): if self.dod: self.dod.stop() self.dod = web2.DataOnDemandWeb2(" ".join( self.searchkeywords['filesMode']), guiutil=self.guiUtility) self.dod.request(initialnum) self.dod.register(self.tthread_gotWeb2Hit) def tthread_gotWeb2Hit(self, item): """ Called by Web2DBSearchThread*s* """ if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: tthread_gotWeb2Hit", ` item[ 'content_name'] ` print >> sys.stderr, "webondemand each time, so we can call sort here, hehe" wx.CallAfter(self.refreshGrid) def web2tonewdb(self, value): newval = {} newval['infohash'] = value['infohash'] newval['name'] = value['content_name'] newval['status'] = value['status'] newval['description'] = value['description'] newval['tags'] = value['tags'] newval['url'] = value['url'] newval['num_leechers'] = value['leecher'] newval['num_seeders'] = value['views'] newval['views'] = value['views'] newval['web2'] = value['web2'] newval['length'] = value['length'] if 'preview' in value: # Apparently not always present newval['preview'] = value['preview'] return newval def addStoredWeb2Results(self, mode, categorykey, range): web2on = self.guiUtility.utility.config.Read('enableweb2search', "boolean") #if DEBUG: # print >>sys.stderr,"TorrentSearchGridManager: getCategory: mode",mode,"webon",web2on,"insearch",self.inSearchMode(mode),"catekey",categorykey if mode == 'filesMode' and web2on and self.inSearchMode(mode) and \ categorykey in ['video', 'all']: # if we are searching in filesmode #self.standardOverview.setSearchFeedback('web2', False, 0) if self.dod: # Arno: ask for more when needed (=only one page left to display) if DEBUG: print >> sys.stderr, "TorrentSearchManager: web2: requestMore?", range[ 1], self.dod.getNumRequested() pagesize = range[1] - range[0] diff = self.dod.getNumRequested() - range[1] if diff <= pagesize: if DEBUG: print >> sys.stderr, "TorrentSearchManager: web2: requestMore diff", diff self.dod.requestMore(pagesize) data = self.dod.getDataSafe() if DEBUG: print >> sys.stderr, "TorrentSearchManager: getHitsInCat: web2: Got total", len( data) numResults = 0 for value in data: # Translate to NEWDB/FileItemPanel format, doing this in # web2/video/genericsearch.py breaks something newval = self.web2tonewdb(value) self.hits.append(newval) numResults += 1 self.standardOverview.setSearchFeedback( 'web2', False, numResults, self.searchkeywords[mode]) #Rameez: The following code will call normalization functions and then #sort and merge the combine torrent and youtube results def sort(self): self.normalizeResults() self.statisticalNormalization() #Rameez: now sort combined (i.e after the above two normalization procedures) for i in range(len(self.hits) - 1): for j in range(i + 1, len(self.hits)): if self.hits[i].get('normScore') < self.hits[j].get( 'normScore'): temp = self.hits[i] self.hits[i] = self.hits[j] self.hits[j] = temp def normalizeResults(self): torrent_total = 0 youtube_total = 0 #Rameez: normalize torrent results for i in range(len(self.hits)): if not self.hits[i].has_key('views'): torrent_total += self.hits[i].get('num_seeders') for i in range(len(self.hits)): if not self.hits[i].has_key('views'): self.hits[i]['normScore'] = self.hits[i].get( 'num_seeders') / float(torrent_total) #Rameez: normalize youtube results for i in range(len(self.hits)): if self.hits[i].has_key( 'views') and self.hits[i].get('views') != "unknown": youtube_total += int(self.hits[i].get('views')) for i in range(len(self.hits)): if self.hits[i].has_key( 'views') and self.hits[i].get('views') != "unknown": self.hits[i]['normScore'] = self.hits[i].get('views') / float( youtube_total) def statisticalNormalization(self): count = 0 tot = 0 #Rameez: statistically normalize torrent results for i in range(len(self.hits)): if not self.hits[i].has_key('views'): if self.hits[i].has_key('normScore'): tot += self.hits[i]['normScore'] count += 1 if count > 0: mean = tot / count else: mean = 0 sum = 0 for i in range(len(self.hits)): if not self.hits[i].has_key('views'): if self.hits[i].has_key('normScore'): temp = self.hits[i]['normScore'] - mean temp = temp * temp sum += temp if count > 1: dev = sum / (count - 1) else: dev = 0 stdDev = sqrt(dev) for i in range(len(self.hits)): if not self.hits[i].has_key('views'): if self.hits[i].has_key('normScore'): if stdDev > 0: self.hits[i]['normScore'] = ( self.hits[i]['normScore'] - mean) / stdDev uCount = 0 uTot = 0 #Rameez: statistically normalize youtube results for i in range(len(self.hits)): if self.hits[i].has_key( 'views') and self.hits[i].get('views') != "unknown": uTot += self.hits[i]['normScore'] uCount += 1 if uCount > 0: uMean = uTot / uCount else: uMean = 0 uSum = 0 for i in range(len(self.hits)): if self.hits[i].has_key( 'views') and self.hits[i].get('views') != "unknown": temp = self.hits[i]['normScore'] - uMean temp = temp * temp uSum += temp if uCount > 1: uDev = uSum / (uCount - 1) else: uDev = 0 ustdDev = sqrt(uDev) for i in range(len(self.hits)): if self.hits[i].has_key( 'views') and self.hits[i].get('views') != "unknown": if ustdDev > 0: self.hits[i]['normScore'] = (self.hits[i]['normScore'] - uMean) / ustdDev
class RemoteQueryMsgHandler: __single = None def __init__(self): if RemoteQueryMsgHandler.__single: raise RuntimeError, "RemoteQueryMsgHandler is singleton" RemoteQueryMsgHandler.__single = self self.connections = Set() # only connected remote_search_peers self.query_ids2rec = {} # ARNOCOMMENT: TODO: purge old entries... self.overlay_log = None self.registered = False def getInstance(*args, **kw): if RemoteQueryMsgHandler.__single is None: RemoteQueryMsgHandler(*args, **kw) return RemoteQueryMsgHandler.__single getInstance = staticmethod(getInstance) def register(self, overlay_bridge, launchmany, config, bc_fac, log=''): if DEBUG: print >> sys.stderr, "rquery: register" self.overlay_bridge = overlay_bridge self.launchmany = launchmany self.search_manager = SearchManager(launchmany.torrent_db) self.peer_db = launchmany.peer_db self.config = config self.bc_fac = bc_fac # May be None if log: self.overlay_log = OverlayLogger.getInstance(log) self.registered = True # # Incoming messages # def handleMessage(self, permid, selversion, message): if not self.registered: return True t = message[0] if t == QUERY: if DEBUG: print >> sys.stderr, "rquery: Got QUERY", len(message) return self.recv_query(permid, message, selversion) if t == QUERY_REPLY: if DEBUG: print >> sys.stderr, "rquery: Got QUERY_REPLY", len(message) return self.recv_query_reply(permid, message, selversion) else: if DEBUG: print >> sys.stderr, "rquery: UNKNOWN OVERLAY MESSAGE", ord(t) return False # # Incoming connections # def handleConnection(self, exc, permid, selversion, locally_initiated): if not self.registered: return True if DEBUG: print >> sys.stderr, "rquery: handleConnection", exc, "v", selversion, "local", locally_initiated if exc is not None: return if selversion < OLPROTO_VER_SIXTH: return True if exc is None: self.connections.add(permid) else: self.connections.remove(permid) return True # # Send query # def send_query(self, query, usercallback, max_nqueries=MAX_NQUERIES): """ Called by GUI Thread """ if max_nqueries is None: max_nqueries = MAX_NQUERIES if DEBUG: print >> sys.stderr, "rquery: send_query", query if max_nqueries > 0: send_query_func = lambda: self.network_send_query_callback( query, usercallback, max_nqueries) self.overlay_bridge.add_task(send_query_func, 0) def network_send_query_callback(self, query, usercallback, max_nqueries): """ Called by overlay thread """ p = self.create_query(query, usercallback) m = QUERY + p query_conn_callback_lambda = lambda exc, dns, permid, selversion: self.conn_callback( exc, dns, permid, selversion, m) if DEBUG: print >> sys.stderr, "rquery: send_query: Connected", len( self.connections), "peers" #print "******** send query net cb:", query, len(self.connections), self.connections nqueries = 0 for permid in self.connections: self.overlay_bridge.connect(permid, query_conn_callback_lambda) nqueries += 1 if nqueries < max_nqueries and self.bc_fac and self.bc_fac.buddycast_core: query_cand = self.bc_fac.buddycast_core.getRemoteSearchPeers( MAX_NQUERIES - nqueries) for permid in query_cand: if permid not in self.connections: # don't call twice self.overlay_bridge.connect(permid, query_conn_callback_lambda) nqueries += 1 if DEBUG: print >> sys.stderr, "rquery: send_query: Sent to", nqueries, "peers" def create_query(self, query, usercallback): d = {} d['q'] = 'SIMPLE ' + query d['id'] = self.create_and_register_query_id(query, usercallback) return bencode(d) def create_and_register_query_id(self, query, usercallback): id = Rand.rand_bytes(QUERY_ID_SIZE) queryrec = {'query': query, 'usercallback': usercallback} self.query_ids2rec[id] = queryrec return id def is_registered_query_id(self, id): if id in self.query_ids2rec: return self.query_ids2rec[id] else: return None def conn_callback(self, exc, dns, permid, selversion, message): if exc is None and selversion >= OLPROTO_VER_SIXTH: self.overlay_bridge.send(permid, message, self.send_callback) def send_callback(self, exc, permid): #print "******* queury was sent to", show_permid_short(permid), exc pass # # Receive query # def recv_query(self, permid, message, selversion): if selversion < OLPROTO_VER_SIXTH: return False # Unpack try: d = bdecode(message[1:]) except: if DEBUG: print >> sys.stderr, "rquery: Cannot bdecode QUERY message" #print_exc() return False if not isValidQuery(d, selversion): return False # Process self.process_query(permid, d, selversion) return True # # Send query reply # def process_query(self, permid, d, selversion): q = d['q'][len('SIMPLE '):] q = dunno2unicode(q) # Format: 'SIMPLE '+string of space separated keywords # In the future we could support full SQL queries: # SELECT infohash,torrent_name FROM torrent_db WHERE status = ALIVE kws = q.split() hits = self.search_manager.search(kws, maxhits=MAX_RESULTS) p = self.create_query_reply(d['id'], hits) m = QUERY_REPLY + p if self.overlay_log: nqueries = self.get_peer_nqueries(permid) # RECV_MSG PERMID OVERSION NUM_QUERIES MSG self.overlay_log('RECV_QRY', show_permid(permid), selversion, nqueries, repr(d)) # RPLY_QRY PERMID NUM_HITS MSG self.overlay_log('RPLY_QRY', show_permid(permid), len(hits), repr(p)) self.overlay_bridge.send(permid, m, self.send_callback) self.inc_peer_nqueries(permid) def create_query_reply(self, id, hits): d = {} d['id'] = id d2 = {} for torrent in hits: r = {} # NEWDBSTANDARD. Do not rename r's fields: they are part of the # rquery protocol spec. r['content_name'] = torrent['name'] r['length'] = torrent['length'] r['leecher'] = torrent['num_leechers'] r['seeder'] = torrent['num_seeders'] # Arno: TODO: sending category doesn't make sense as that's user-defined # leaving it now because of time constraints r['category'] = torrent['category'] d2[torrent['infohash']] = r d['a'] = d2 return bencode(d) # # Receive query reply # def recv_query_reply(self, permid, message, selversion): #print "****** recv query reply", len(message) if selversion < OLPROTO_VER_SIXTH: return False if len(message) > MAX_QUERY_REPLY_LEN: return True # don't close # Unpack try: d = bdecode(message[1:]) except: if DEBUG: print >> sys.stderr, "rquery: Cannot bdecode QUERY_REPLY message" return False if not isValidQueryReply(d, selversion): if DEBUG: print >> sys.stderr, "rquery: not valid QUERY_REPLY message" return False # Check auth queryrec = self.is_registered_query_id(d['id']) if not queryrec: if DEBUG: print >> sys.stderr, "rquery: QUERY_REPLY has unknown query ID" return False # Process self.process_query_reply(permid, queryrec['query'], queryrec['usercallback'], d) return True def process_query_reply(self, permid, query, usercallback, d): print >> sys.stderr, "rquery: process_query_reply:", show_permid_short( permid), query, d if len(d['a']) > 0: remote_query_usercallback_lambda = lambda: usercallback( permid, query, d['a']) self.launchmany.session.uch.perform_usercallback( remote_query_usercallback_lambda) elif DEBUG: print >> sys.stderr, "rquery: QUERY_REPLY: no results found" def test_send_query(self, query): """ Called by GUI Thread """ add_remote_hits_func = lambda: self.add_remote_query_hits(query) self.overlay_bridge.add_task(add_remote_hits_func, 3) def add_remote_query_hits(self, query): torrent = {} torrent['content_name'] = 'Hallo 1' torrent['length'] = 100000000 torrent['leecher'] = 200 torrent['seeder'] = 400 torrent['category'] = 'Video' torrent2 = {} torrent2['content_name'] = 'Hallo 2' torrent2['length'] = 7777777 torrent2['leecher'] = 678 torrent2['seeder'] = 123 torrent2['category'] = 'Audio' d = {} ih = 'a' * 20 ih2 = 'b' * 20 d[ih] = torrent d[ih2] = torrent2 kws = query.split() permid = None self.notify_of_remote_hits(permid, kws, d) def inc_peer_nqueries(self, permid): peer = self.peer_db.getPeer(permid) try: if peer is not None: print >> sys.stderr, "rqmh: inc_peer_nqueries: getPeer", peer nqueries = peer['num_queries'] if nqueries is None: nqueries = 0 self.peer_db.updatePeer(permid, num_queries=nqueries + 1) except: print_exc()