Beispiel #1
0
    def load_from_url(url):
        """
        If the URL starts with 'http:' load a BT .torrent or Tribler .tstream
        file from the URL and convert it into a TorrentDef. If the URL starts
        with our URL scheme, we convert the URL to a URL-compatible TorrentDef.

        @param url URL
        @return TorrentDef.
        """
        # Class method, no locking required
        if url.startswith(P2PURL_SCHEME):
            (metainfo, swarmid) = makeurl.p2purl2metainfo(url)

            # Metainfo created from URL, so create URL compatible TorrentDef.
            metainfo['info']['url-compat'] = 1

            # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE
            #metainfo['info']['name.utf-8'] = metainfo['info']['name']

            t = TorrentDef._create(metainfo)

            return t
        else:
            f = urlOpenTimeout(url)
            return TorrentDef._read(f)
Beispiel #2
0
    def load_from_url(url):
        """
        If the URL starts with 'http:' load a BT .torrent or Tribler .tstream
        file from the URL and convert it into a TorrentDef. If the URL starts
        with our URL scheme, we convert the URL to a URL-compatible TorrentDef.
        
        @param url URL
        @return TorrentDef.
        """
        # Class method, no locking required
        if url.startswith(P2PURL_SCHEME):
            (metainfo, swarmid) = makeurl.p2purl2metainfo(url)

            # Metainfo created from URL, so create URL compatible TorrentDef.
            metainfo["info"]["url-compat"] = 1

            # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE
            # metainfo['info']['name.utf-8'] = metainfo['info']['name']

            t = TorrentDef._create(metainfo)

            return t
        else:
            f = urlOpenTimeout(url)
            return TorrentDef._read(f)
def vod_event_callback(d, event, params):
    if event == VODEVENT_START:
        stream = params["stream"]

        # SWIFTPROC
        if stream is None:
            # Access swift HTTP interface directly
            stream = urlOpenTimeout(params["url"], timeout=30)
            # ARNOSMPTODO: available()

        grandtotal = 0L
        st = time.time()
        while True:
            global RATE
            total = 0
            while total < int(RATE):
                data = stream.read(int(RATE))
                total += len(data)

            grandtotal += total
            et = time.time()
            diff = max(et - st, 0.00001)
            grandrate = float(grandtotal) / diff
            print >> sys.stderr, "bitbucket: grandrate", grandrate, "~", RATE  #,"avail",stream.available()
            time.sleep(1.0)
Beispiel #4
0
def getStatus(announce, url, info_hash, info_hashes):
    returndict = {info_hash: (0, 0)}
    try:
        resp = timeouturlopen.urlOpenTimeout(url, timeout=HTTP_TIMEOUT)
        response = resp.read()

        response_dict = bdecode(response)
        for cur_infohash, status in response_dict["files"].iteritems():
            seeder = max(0, status["complete"])
            leecher = max(0, status["incomplete"])

            returndict[cur_infohash] = (seeder, leecher)

        registerSuccess(announce)
        return returndict

    except IOError:
        registerIOError(announce)
        return {info_hash: (-1, -1)}

    except KeyError:
        try:
            if "flags" in response_dict:  # may be interval problem
                if "min_request_interval" in response_dict["flags"]:
                    return {info_hash: (-3, -3)}
        except:
            pass
    except:
        pass
    return None
def getStatus(announce, url, info_hash, info_hashes):
    returndict = {}
    try:
        resp = timeouturlopen.urlOpenTimeout(url, timeout=HTTP_TIMEOUT)
        response = resp.read()

        response_dict = bdecode(response)
        for cur_infohash, status in response_dict["files"].iteritems():
            seeder = max(0, status["complete"])
            leecher = max(0, status["incomplete"])

            returndict[cur_infohash] = (seeder, leecher)

        registerSuccess(announce)
        return returndict

    except IOError:
        registerIOError(announce)
        return {info_hash: (-1, -1)}

    except KeyError:
        try:
            if response_dict.has_key("flags"):  # may be interval problem
                if response_dict["flags"].has_key("min_request_interval"):
                    return {info_hash: (-3, -3)}
        except:
            pass
    except:
        pass
    return None
def vod_event_callback(d, event, params):
    if event == VODEVENT_START:
        stream = params["stream"]

        # SWIFTPROC
        if stream is None:
            # Access swift HTTP interface directly
            stream = urlOpenTimeout(params["url"], timeout=30)
            # ARNOSMPTODO: available()

        grandtotal = 0
        st = time.time()
        while True:
            global RATE
            total = 0
            while total < int(RATE):
                data = stream.read(int(RATE))
                total += len(data)

            grandtotal += total
            et = time.time()
            diff = max(et - st, 0.00001)
            grandrate = float(grandtotal) / diff
            print >>sys.stderr, "bitbucket: grandrate", grandrate, "~", RATE  # ,"avail",stream.available()
            time.sleep(1.0)
Beispiel #7
0
 def reopen(self):
     while True:
         try:
             self.stream = urlOpenTimeout(self.url)  # 30 sec timeout
             break
         except:
             print_exc()
             time.sleep(5.0)  # No exp. backoff, get back ASAP
Beispiel #8
0
 def reopen(self):
     while True:
         try:
             self.stream = urlOpenTimeout(self.url)  # 30 sec timeout
             break
         except:
             print_exc()
             time.sleep(5.0)  # No exp. backoff, get back ASAP
Beispiel #9
0
 def _refresh(self):
     channel_url = None
     try:
         self.key_url_lock.acquire()
         channel_url = deepcopy(self.key_url)
     finally:
         self.key_url_lock.release()
     
     if channel_url:
         for key, urls in channel_url.iteritems():
             if key in self.key_callbacks:
                 for url in urls:
                     if DEBUG:
                         print >> sys.stderr, "RssParser: getting rss", url, len(urls)
                     
                     historyfile = self.gethistfilename(url, key)
                     urls_already_seen = URLHistory(historyfile)
                     urls_already_seen.read()
                     
                     newItems = self.readUrl(url, urls_already_seen)
                     for title, new_urls, description, thumbnail in newItems:
                         for new_url in new_urls:
                             urls_already_seen.add(new_url)
                             urls_already_seen.write()
                             
                             try:
                                 if DEBUG:
                                     print >> sys.stderr, "RssParser: trying", new_url
                                 
                                 referer = urlparse(new_url)
                                 referer = referer.scheme+"://"+referer.netloc+"/"
                                 stream = urlOpenTimeout(new_url, referer=referer)
                                 bdata = stream.read()
                                 stream.close()
                                 
                                 bddata = bdecode(bdata, 1)
                                 torrent = TorrentDef._create(bddata)
                                 
                                 def processCallbacks(key):  
                                     for callback in self.key_callbacks[key]:
                                         try:
                                             callback(key, torrent, extraInfo = {'title':title, 'description': description, 'thumbnail': thumbnail})
                                         except:
                                             print_exc()
                                 
                                 if self.remote_th.is_registered():
                                     callback = lambda key=key: processCallbacks(key)
                                     self.remote_th.save_torrent(torrent, callback)
                                 else:
                                     processCallbacks(key)
                                     
                             except:
                                 if DEBUG:
                                     print >> sys.stderr, "RssParser: could not download", new_url
                                 pass
                             
                             time.sleep(RSS_CHECK_FREQUENCY)
Beispiel #10
0
    def _refresh(self):
        channel_url = None
        try:
            self.key_url_lock.acquire()
            channel_url = deepcopy(self.key_url)
        finally:
            self.key_url_lock.release()

        if channel_url:
            for key, urls in channel_url.iteritems():
                if key in self.key_callbacks:
                    for url in urls:
                        if DEBUG:
                            print >> sys.stderr, "RssParser: getting rss", url, len(urls)

                        historyfile = self.gethistfilename(url, key)
                        urls_already_seen = URLHistory(historyfile)
                        urls_already_seen.read()

                        newItems = self.readUrl(url, urls_already_seen)
                        for title, new_urls, description, thumbnail in newItems:
                            for new_url in new_urls:
                                urls_already_seen.add(new_url)
                                urls_already_seen.write()

                                try:
                                    if DEBUG:
                                        print >> sys.stderr, "RssParser: trying", new_url

                                    referer = urlparse(new_url)
                                    referer = referer.scheme + "://" + referer.netloc + "/"
                                    stream = urlOpenTimeout(new_url, referer=referer)
                                    bdata = stream.read()
                                    stream.close()

                                    bddata = bdecode(bdata, 1)
                                    torrent = TorrentDef._create(bddata)

                                    def processCallbacks(key):
                                        for callback in self.key_callbacks[key]:
                                            try:
                                                callback(key, torrent, extraInfo={'title': title, 'description': description, 'thumbnail': thumbnail})
                                            except:
                                                print_exc()

                                    if self.remote_th.is_registered():
                                        callback = lambda key = key: processCallbacks(key)
                                        self.remote_th.save_torrent(torrent, callback)
                                    else:
                                        processCallbacks(key)

                                except:
                                    if DEBUG:
                                        print >> sys.stderr, "RssParser: could not download", new_url
                                    pass

                                time.sleep(RSS_CHECK_FREQUENCY)
Beispiel #11
0
 def parse(self):
     self.title2entrymap = {}
     print >>sys.stderr,"feedp: Parsing",self.feedurl
     stream = urlOpenTimeout(self.feedurl,10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         #print >> sys.stderr,"feedp: Got title",titleelement.text
         self.title2entrymap[titleelement.text] = entry
 def parse(self):
     self.title2entrymap = {}
     print >> sys.stderr, "feedp: Parsing", self.feedurl
     stream = urlOpenTimeout(self.feedurl, 10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         #print >> sys.stderr,"feedp: Got title",titleelement.text
         self.title2entrymap[titleelement.text] = entry
Beispiel #13
0
 def load_from_url(url):
     """
     Load a BT .torrent or Tribler .tribe file from the URL and convert
     it into a TorrentDef.
     
     @param url URL
     @return TorrentDef.
     """
     # Class method, no locking required
     f = urlOpenTimeout(url)
     return TorrentDef._read(f)
Beispiel #14
0
 def load_from_url(url):
     """
     Load a BT .torrent or Tribler .tribe file from the URL and convert
     it into a TorrentDef.
     
     @param url URL
     @return TorrentDef.
     """
     # Class method, no locking required
     f = urlOpenTimeout(url)
     return TorrentDef._read(f)
Beispiel #15
0
 def parse(self):
     self.feedurls = []
     stream = urlOpenTimeout(self.metafeedurl,10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         linkelement = entry.find('{http://www.w3.org/2005/Atom}link')
         if linkelement is not None:
             if linkelement.attrib['type'] == 'application/atom+xml':
                 # Got feed
                 feedurl = linkelement.attrib['href']
                 self.feedurls.append(feedurl)
 def parse(self):
     self.feedurls = []
     stream = urlOpenTimeout(self.metafeedurl, 10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         linkelement = entry.find('{http://www.w3.org/2005/Atom}link')
         if linkelement is not None:
             if linkelement.attrib['type'] == 'application/atom+xml':
                 # Got feed
                 feedurl = linkelement.attrib['href']
                 self.feedurls.append(feedurl)
Beispiel #17
0
 def getIPInfoByURL(url, proxy=None):
     """ Get IP location by visit some ip search engine page """
     #TODO: getIPInfoByURL with Proxy support
     #Known urls: http://www.hostip.info/api/get.html?ip=xxx&position=true 
     #  http://www.melissadata.com/Lookups/iplocation.asp?ipaddress=xxx&submit=submit (using IP2Location database without coordinate)
     
     try:
         file = timeouturlopen.urlOpenTimeout(url,timeout=2)
         ip_info = file.read()
     except:
         if DEBUG:
             print >> sys.stderr,"ipinfo: getIPInfoByURL failed: cannot access", url
         raise Exception
                     
     return ip_info
Beispiel #18
0
 def hasNewVersion(self):
     my_version = self.utility.getVersion()
     try:
         # Arno: TODO: don't let this be done by MainThread 
         curr_status = urlOpenTimeout('http://tribler.org/version/').readlines()
         line1 = curr_status[0]
         if len(curr_status) > 1:
             self.update_url = curr_status[1].strip()
         else:
             self.update_url = 'http://tribler.org/'
         _curr_status = line1.split()
         self.curr_version = _curr_status[0]
         return self.newversion(self.curr_version, my_version)
     except:
         print_exc()
         return False
def getStatus(url, info_hash):
    try:
        resp = timeouturlopen.urlOpenTimeout(url,timeout=HTTP_TIMEOUT)
        response = resp.read()
        
    except IOError:
#        print "IOError"
        return (-1, -1)                    # unknown
    except AttributeError:
#        print "AttributeError"
        return (-2, -2)                    # dead
    
    try:
        response_dict = bdecode(response)

    except:
#        print "DeCode Error "  + response
        return (-2, -2)                    # dead
    
    try:
        status = response_dict["files"][info_hash]
        seeder = status["complete"]
        if seeder < 0:
            seeder = 0
        leecher = status["incomplete"]
        if leecher < 0:
            leecher = 0
        
    except KeyError:
#        print "KeyError "  + info_hash + str(response_dict)
        try:
            if response_dict.has_key("flags"): # may be interval problem        
                if response_dict["flags"].has_key("min_request_interval"):
#                    print "interval problem"
                    return (-3 ,-3)
        except:
            pass
#        print "KeyError "  + info_hash + str(response_dict)
        return (-2, -2)                    # dead
    
    return (seeder, leecher)
 def loadMetadata(self,data):
     """ Called by non-GUI thread """
     
     if DEBUG:
         print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: url",data['url']
     mimetype = None
     bmpdata = None
     if not ('persistent' in data):
         try:
             t = urlparse.urlparse(data['url'])
             #print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: parsed url",t
             newurl = t[0]+'://'+t[1]+'/'+'favicon.ico'
             if DEBUG:
                 print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: newurl",newurl
             stream = urlOpenTimeout(newurl,timeout=5)
             mimetype = 'image/x-ico' # 'image/vnd.microsoft.icon' # 'image/ico'
             bmpdata = stream.read()
             stream.close()
         except:
             print_exc()
     
     wx.CallAfter(self.metadata_thread_gui_callback,data,mimetype,bmpdata)
    def loadMetadata(self, data):
        """ Called by non-GUI thread """

        if DEBUG:
            print >> sys.stderr, "subip: ThumbnailViewer: loadMetadata: url", data[
                'url']
        mimetype = None
        bmpdata = None
        if not ('persistent' in data):
            try:
                t = urlparse.urlparse(data['url'])
                #print >>sys.stderr,"subip: ThumbnailViewer: loadMetadata: parsed url",t
                newurl = t[0] + '://' + t[1] + '/' + 'favicon.ico'
                if DEBUG:
                    print >> sys.stderr, "subip: ThumbnailViewer: loadMetadata: newurl", newurl
                stream = urlOpenTimeout(newurl, timeout=5)
                mimetype = 'image/x-ico'  # 'image/vnd.microsoft.icon' # 'image/ico'
                bmpdata = stream.read()
                stream.close()
            except:
                print_exc()

        wx.CallAfter(self.metadata_thread_gui_callback, data, mimetype,
                     bmpdata)
Beispiel #22
0
    def _rerequest_single(self, t, s, l, callback):
        
        if prctlimported:
            prctl.set_name("Tribler"+currentThread().getName())
        
        try:        
            closer = [None]
            def timedout(self = self, l = l, closer = closer):
                if self.lock.trip(l):
                    self.errorcodes['troublecode'] = 'Problem connecting to tracker - timeout exceeded'
                    self.lock.unwait(l)
                try:
                    closer[0]()
                except:
                    pass
                    
            self.externalsched(timedout, self.timeout)

            err = None
            try:
                if DEBUG:
                    print >>sys.stderr,"Rerequest tracker:"
                    print >>sys.stderr,merge_announce(t, s)
                h = urlOpenTimeout(merge_announce(t, s))
                closer[0] = h.close
                data = h.read()
            except (IOError, error), e:
                err = 'Problem connecting to tracker - ' + str(e)
                if DEBUG:
                    print_exc()
            except:
                err = 'Problem connecting to tracker'
                if DEBUG:
                    print_exc()
                    
                    
            #if DEBUG:
            #    print >>sys.stderr,"rerequest: Got data",data
                    
            try:
                h.close()
            except:
                pass
            if err:        
                if self.lock.trip(l):
                    self.errorcodes['troublecode'] = err
                    self.lock.unwait(l)
                return

            if not data:
                if self.lock.trip(l):
                    self.errorcodes['troublecode'] = 'no data from tracker'
                    self.lock.unwait(l)
                return
            
            try:
                r = bdecode(data, sloppy=1)
                if DEBUG:
                    print >>sys.stderr,"Rerequester: Tracker returns:", r
                check_peers(r)
                
                #print >>sys.stderr,"Rerequester: Tracker returns, post check done"
                
            except ValueError, e:
                if DEBUG:
                    print_exc()
                if self.lock.trip(l):
                    self.errorcodes['bad_data'] = 'bad data from tracker - ' + str(e)
                    self.lock.unwait(l)
                return
Beispiel #23
0
    def refresh(self):
        """Returns a generator for a list of (title,urllib2openedurl_to_torrent)
        pairs for this feed. TorrentFeedReader instances keep a list of
        torrent urls in memory and will yield a torrent only once.
        If the feed points to a torrent url with webserver problems,
        that url will not be retried.
        urllib2openedurl_to_torrent may be None if there is a webserver problem.
        """
        
        # Load history from disk
        if not self.urls_already_seen.readed:
            self.urls_already_seen.read()
            self.urls_already_seen.readed = True
        
        feed_socket = urlOpenTimeout(self.feed_url,timeout=5)
        feed_xml = feed_socket.read()
        feed_socket.close()
        #if DEBUG:
        #    print "<mluc> feed.refresh read xml:",feed_xml
        feed_dom = parseString(feed_xml)

        entries = [(title,link) for title,link in
                   [(item.getElementsByTagName("title")[0].childNodes[0].data,
                     item.getElementsByTagName("link")[0].childNodes[0].data) for
                    item in feed_dom.getElementsByTagName("item")]
                   if link.endswith(".torrent") and not self.urls_already_seen.contains(link)]


        # vuze feeds contain <entry> tags instead of <item> tags which includes
        # a <content> tags that contain the link to the torrent file as an 
        # attribute. Support them especially
        for item in feed_dom.getElementsByTagName("entry"):
            title = item.getElementsByTagName("title")[0].childNodes[0].data
            #print "ENCLOSURE",item.getElementsByTagName("content")
            k = item.getElementsByTagName("content").length
            #print "ENCLOSURE LEN",k
            for i in range(k):
                child = item.getElementsByTagName("content").item(i)
                #print "ENCLOSURE CHILD",`child`
                if child.hasAttribute("src"):
                    link = child.getAttribute("src")
                    #print "ENCLOSURE CHILD getattrib",link
                    if not self.urls_already_seen.contains(link):
                        entries.append((title,link))
                #else:
                #    print "ENCLOSURE CHILD NO src"


        if DEBUG:
            print >>sys.stderr,"subscrip: Parse of RSS returned",len(entries),"previously unseen torrents"

#        for title,link in entries:
#            print "Link",link,"is in cache?",self.urls_already_seen.contains(link)
#
#        return

        
        for title,link in entries:
            # print title,link
            try:
                self.urls_already_seen.add(link)
                if DEBUG:
                    print >>sys.stderr,"subscrip: Opening",link
                html_or_tor = urlOpenTimeout(link,timeout=5)
                found_torrent = False
                tor_type = html_or_tor.headers.gettype()
                if self.isTorrentType(tor_type):
                    torrent = html_or_tor
                    found_torrent = True
                    if DEBUG:
                        print >>sys.stderr,"subscrip: Yielding",link
                    yield title,torrent
                elif False: # 'html' in tor_type:
                    html = html_or_tor.read()
                    hrefs = [match.group(1) for match in self.href_re.finditer(html)]
                          
                    urls = []
                    for url in hrefs:
                        if not self.urls_already_seen.contains(url):
                            self.urls_already_seen.add(url)
                            urls.append(urlparse.urljoin(link,url))
                    for url in urls:
                        #print url
                        try:
                            if DEBUG:
                                print >>sys.stderr,"subscrip: Opening",url
                            torrent = urlOpenTimeout(url)
                            url_type = torrent.headers.gettype()
                            #print url_type
                            if self.isTorrentType(url_type):
                                #print "torrent found:",url
                                found_torrent = True
                                if DEBUG:
                                    print >>sys.stderr,"subscrip: Yielding",url
                                yield title,torrent
                                break
                            else:
                                #its not a torrent after all, but just some html link
                                pass
                        except:
                            #url didn't open
                            pass
                if not found_torrent:
                    yield title,None
            except:
                traceback.print_exc()
                yield title,None
    def _rerequest_single(self, t, s, l, callback):

        if prctlimported:
            prctl.set_name("Tribler" + currentThread().getName())

        try:
            closer = [None]

            def timedout(self=self, l=l, closer=closer):
                if self.lock.trip(l):
                    self.errorcodes[
                        'troublecode'] = 'Problem connecting to tracker - timeout exceeded'
                    self.lock.unwait(l)
                try:
                    closer[0]()
                except:
                    pass

            self.externalsched(timedout, self.timeout)

            err = None
            try:
                if DEBUG:
                    print >> sys.stderr, "Rerequest tracker:"
                    print >> sys.stderr, merge_announce(t, s)
                h = urlOpenTimeout(merge_announce(t, s))
                closer[0] = h.close
                data = h.read()
            except (IOError, error), e:
                err = 'Problem connecting to tracker - ' + str(e)
                if DEBUG:
                    print_exc()
            except:
                err = 'Problem connecting to tracker'
                if DEBUG:
                    print_exc()

            #if DEBUG:
            #    print >>sys.stderr,"rerequest: Got data",data

            try:
                h.close()
            except:
                pass
            if err:
                if self.lock.trip(l):
                    self.errorcodes['troublecode'] = err
                    self.lock.unwait(l)
                return

            if not data:
                if self.lock.trip(l):
                    self.errorcodes['troublecode'] = 'no data from tracker'
                    self.lock.unwait(l)
                return

            try:
                r = bdecode(data, sloppy=1)
                if DEBUG:
                    print >> sys.stderr, "Rerequester: Tracker returns:", r
                check_peers(r)

                #print >>sys.stderr,"Rerequester: Tracker returns, post check done"

            except ValueError, e:
                if DEBUG:
                    print_exc()
                if self.lock.trip(l):
                    self.errorcodes[
                        'bad_data'] = 'bad data from tracker - ' + str(e)
                    self.lock.unwait(l)
                return
Beispiel #25
0
    def refresh(self):
        """Returns a generator for a list of (title,urllib2openedurl_to_torrent)
        pairs for this feed. TorrentFeedReader instances keep a list of
        torrent urls in memory and will yield a torrent only once.
        If the feed points to a torrent url with webserver problems,
        that url will not be retried.
        urllib2openedurl_to_torrent may be None if there is a webserver problem.
        """

        # Load history from disk
        if not self.urls_already_seen.readed:
            self.urls_already_seen.read()
            self.urls_already_seen.readed = True

        feed_socket = urlOpenTimeout(self.feed_url, timeout=5)
        feed_xml = feed_socket.read()
        feed_socket.close()
        #if DEBUG:
        #    print "<mluc> feed.refresh read xml:",feed_xml
        feed_dom = parseString(feed_xml)

        entries = [(title, link) for title, link in
                   [(item.getElementsByTagName("title")[0].childNodes[0].data,
                     item.getElementsByTagName("link")[0].childNodes[0].data)
                    for item in feed_dom.getElementsByTagName("item")]
                   if link.endswith(".torrent")
                   and not self.urls_already_seen.contains(link)]

        # vuze feeds contain <entry> tags instead of <item> tags which includes
        # a <content> tags that contain the link to the torrent file as an
        # attribute. Support them especially
        for item in feed_dom.getElementsByTagName("entry"):
            title = item.getElementsByTagName("title")[0].childNodes[0].data
            #print "ENCLOSURE",item.getElementsByTagName("content")
            k = item.getElementsByTagName("content").length
            #print "ENCLOSURE LEN",k
            for i in range(k):
                child = item.getElementsByTagName("content").item(i)
                #print "ENCLOSURE CHILD",`child`
                if child.hasAttribute("src"):
                    link = child.getAttribute("src")
                    #print "ENCLOSURE CHILD getattrib",link
                    if not self.urls_already_seen.contains(link):
                        entries.append((title, link))
                #else:
                #    print "ENCLOSURE CHILD NO src"

        if DEBUG:
            print >> sys.stderr, "subscrip: Parse of RSS returned", len(
                entries), "previously unseen torrents"


#        for title,link in entries:
#            print "Link",link,"is in cache?",self.urls_already_seen.contains(link)
#
#        return

        for title, link in entries:
            # print title,link
            try:
                self.urls_already_seen.add(link)
                if DEBUG:
                    print >> sys.stderr, "subscrip: Opening", link
                html_or_tor = urlOpenTimeout(link, timeout=5)
                found_torrent = False
                tor_type = html_or_tor.headers.gettype()
                if self.isTorrentType(tor_type):
                    torrent = html_or_tor
                    found_torrent = True
                    if DEBUG:
                        print >> sys.stderr, "subscrip: Yielding", link
                    yield title, torrent
                elif False:  # 'html' in tor_type:
                    html = html_or_tor.read()
                    hrefs = [
                        match.group(1) for match in self.href_re.finditer(html)
                    ]

                    urls = []
                    for url in hrefs:
                        if not self.urls_already_seen.contains(url):
                            self.urls_already_seen.add(url)
                            urls.append(urlparse.urljoin(link, url))
                    for url in urls:
                        #print url
                        try:
                            if DEBUG:
                                print >> sys.stderr, "subscrip: Opening", url
                            torrent = urlOpenTimeout(url)
                            url_type = torrent.headers.gettype()
                            #print url_type
                            if self.isTorrentType(url_type):
                                #print "torrent found:",url
                                found_torrent = True
                                if DEBUG:
                                    print >> sys.stderr, "subscrip: Yielding", url
                                yield title, torrent
                                break
                            else:
                                #its not a torrent after all, but just some html link
                                pass
                        except:
                            #url didn't open
                            pass
                if not found_torrent:
                    yield title, None
            except:
                traceback.print_exc()
                yield title, None
Beispiel #26
0
    def refresh(self):
        """Returns a generator for a list of (title,urllib2openedurl_to_torrent)
        pairs for this feed. TorrentFeedReader instances keep a list of
        torrent urls in memory and will yield a torrent only once.
        If the feed points to a torrent url with webserver problems,
        that url will not be retried.
        urllib2openedurl_to_torrent may be None if there is a webserver problem.
        """

        # Load history from disk
        if not self.urls_already_seen.readed:
            self.urls_already_seen.read()
            self.urls_already_seen.readed = True

        while True:
            try:
                feed_socket = urlOpenTimeout(self.feed_url, timeout=20)
                feed_xml = feed_socket.read()
                feed_socket.close()
                break
            except:
                yield None, None
                return

        # 14/07/08 boudewijn: some special characters and html code is
        # raises a parser exception. We filter out these character
        # sequenses using a regular expression in the filter_xml
        # function
        dom = parseString(self._filter_xml(feed_xml))
        entries = []

        # The following XML will result in three links with the same title.
        #
        # <item>
        # <title>The title</title>
        # <link>http:/frayja.com/torrent/1</link>
        # <foobar src="frayja.com/torrent/2">Unused title</foobar>
        # <moomilk url="frayja.com/torrent/3">Unused title</moomilk>
        # </items>
        for item in dom.getElementsByTagName(
                "item"):  #+ dom.getElementsByTagName("entry"):
            title = None
            links = []
            child = item.firstChild
            while child:
                if child.nodeType == 1:  # ELEMENT_NODE (according to the DOM standard)
                    if child.nodeName == "title" and child.firstChild:
                        title = child.firstChild.data

                    if child.nodeName == "link" and child.firstChild:
                        links.append(child.firstChild.data)

                    if child.hasAttribute("src"):
                        links.append(child.getAttribute("src"))

                    if child.hasAttribute("url"):
                        links.append(child.getAttribute("url"))

                child = child.nextSibling

            if title and links:
                entries.extend([(title, link) for link in links
                                if not self.urls_already_seen.contains(link)])

        if DEBUG:
            print >> sys.stderr, "subscrip: Parse of RSS returned", len(
                entries), "previously unseen torrents"

        for title, link in entries:
            # print title,link
            try:
                self.urls_already_seen.add(link)
                if DEBUG:
                    print >> sys.stderr, "subscrip: Opening", title, link
                html_or_tor = urlOpenTimeout(link, timeout=20)
                found_torrent = False
                tor_type = html_or_tor.headers.gettype()

                if self.isTorrentType(tor_type):
                    torrent = html_or_tor
                    found_torrent = True
                    if DEBUG:
                        print >> sys.stderr, "subscrip: torrent1: Yielding", link
                    yield title, torrent

                elif False:  # 'html' in tor_type:
                    html = html_or_tor.read()
                    hrefs = [
                        match.group(1) for match in self.href_re.finditer(html)
                    ]

                    urls = []
                    for url in hrefs:
                        if not self.urls_already_seen.contains(url):
                            self.urls_already_seen.add(url)
                            urls.append(urlparse.urljoin(link, url))
                    for url in urls:
                        #print url
                        try:
                            if DEBUG:
                                print >> sys.stderr, "subscrip: torrent2: Opening", url
                            torrent = urlOpenTimeout(url)
                            url_type = torrent.headers.gettype()
                            #print url_type
                            if self.isTorrentType(url_type):
                                #print "torrent found:",url
                                found_torrent = True
                                if DEBUG:
                                    print >> sys.stderr, "subscrip: torrent2: Yielding", url
                                yield title, torrent
                                break
                            else:
                                #its not a torrent after all, but just some html link
                                if DEBUG:
                                    print >> sys.stderr, "subscrip:%s not a torrent" % url
                        except:
                            #url didn't open
                            if DEBUG:
                                print >> sys.stderr, "subscrip:%s did not open" % url
                if not found_torrent:
                    yield title, None
            except GeneratorExit:
                if DEBUG:
                    print >> sys.stderr, "subscrip:GENERATOREXIT"
                # the generator is destroyed. we accept this by returning
                return
            except Exception, e:
                print >> sys.stderr, "rss_client:", e
                yield title, None
Beispiel #27
0
    def refresh(self):
        """Returns a generator for a list of (title,urllib2openedurl_to_torrent)
        pairs for this feed. TorrentFeedReader instances keep a list of
        torrent urls in memory and will yield a torrent only once.
        If the feed points to a torrent url with webserver problems,
        that url will not be retried.
        urllib2openedurl_to_torrent may be None if there is a webserver problem.
        """
        
        # Load history from disk
        if not self.urls_already_seen.readed:
            self.urls_already_seen.read()
            self.urls_already_seen.readed = True

        while True:
            try:
                feed_socket = urlOpenTimeout(self.feed_url,timeout=20)
                feed_xml = feed_socket.read()
                feed_socket.close()
                break
            except:
                yield None, None
                return

        # 14/07/08 boudewijn: some special characters and html code is
        # raises a parser exception. We filter out these character
        # sequenses using a regular expression in the filter_xml
        # function
        dom = parseString(self._filter_xml(feed_xml))
        entries = []

        # The following XML will result in three links with the same title.
        #
        # <item>
        # <title>The title</title>
        # <link>http:/frayja.com/torrent/1</link>
        # <foobar src="frayja.com/torrent/2">Unused title</foobar>
        # <moomilk url="frayja.com/torrent/3">Unused title</moomilk>
        # </items>
        for item in dom.getElementsByTagName("item"): #+ dom.getElementsByTagName("entry"):
            title = None
            links = []
            child = item.firstChild
            while child:
                if child.nodeType == 1: # ELEMENT_NODE (according to the DOM standard)
                    if child.nodeName == "title" and child.firstChild:
                        title = child.firstChild.data

                    if child.nodeName == "link" and child.firstChild:
                        links.append(child.firstChild.data)

                    if child.hasAttribute("src"):
                        links.append(child.getAttribute("src"))

                    if child.hasAttribute("url"):
                        links.append(child.getAttribute("url"))

                child = child.nextSibling

            if title and links:
                entries.extend([(title, link) for link in links if not self.urls_already_seen.contains(link)])

        if DEBUG:
            print >>sys.stderr,"subscrip: Parse of RSS returned",len(entries),"previously unseen torrents"

        for title,link in entries:
            # print title,link
            try:
                self.urls_already_seen.add(link)
                if DEBUG:
                    print >>sys.stderr,"subscrip: Opening",title,link
                html_or_tor = urlOpenTimeout(link,timeout=20)
                found_torrent = False
                tor_type = html_or_tor.headers.gettype()
                
                if self.isTorrentType(tor_type):
                    torrent = html_or_tor
                    found_torrent = True
                    if DEBUG:
                        print >>sys.stderr,"subscrip: torrent1: Yielding",link
                    yield title,torrent
                    
                elif False: # 'html' in tor_type:
                    html = html_or_tor.read()
                    hrefs = [match.group(1) for match in self.href_re.finditer(html)]
                          
                    urls = []
                    for url in hrefs:
                        if not self.urls_already_seen.contains(url):
                            self.urls_already_seen.add(url)
                            urls.append(urlparse.urljoin(link,url))
                    for url in urls:
                        #print url
                        try:
                            if DEBUG:
                                print >>sys.stderr,"subscrip: torrent2: Opening",url
                            torrent = urlOpenTimeout(url)
                            url_type = torrent.headers.gettype()
                            #print url_type
                            if self.isTorrentType(url_type):
                                #print "torrent found:",url
                                found_torrent = True
                                if DEBUG:
                                    print >>sys.stderr,"subscrip: torrent2: Yielding",url
                                yield title,torrent
                                break
                            else:
                                #its not a torrent after all, but just some html link
                                if DEBUG:
                                    print >>sys.stderr,"subscrip:%s not a torrent" % url
                        except:
                            #url didn't open
                            if DEBUG:
                                print >>sys.stderr,"subscrip:%s did not open" % url
                if not found_torrent:
                    yield title,None
            except GeneratorExit:
                if DEBUG:
                    print >>sys.stderr,"subscrip:GENERATOREXIT"
                # the generator is destroyed. we accept this by returning
                return
            except Exception, e:
                print >> sys.stderr, "rss_client:", e
                yield title,None