def load_from_url(url):
        """
        If the URL starts with 'http:' load a BT .torrent or Tribler .tstream
        file from the URL and convert it into a TorrentDef. If the URL starts
        with our URL scheme, we convert the URL to a URL-compatible TorrentDef.
        
        @param url URL
        @return TorrentDef.
        """
        # Class method, no locking required
        if url.startswith(P2PURL_SCHEME):
            (metainfo,swarmid) = makeurl.p2purl2metainfo(url)

            # Metainfo created from URL, so create URL compatible TorrentDef.
            metainfo['info']['url-compat'] = 1

            # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE 
            #metainfo['info']['name.utf-8'] = metainfo['info']['name'] 

            t = TorrentDef._create(metainfo)
            
            return t
        else:
            f = urlOpenTimeout(url)
            return TorrentDef._read(f)
Esempio n. 2
0
    def load_from_url(url):
        """
        If the URL starts with 'http:' load a BT .torrent or Tribler .tstream
        file from the URL and convert it into a TorrentDef. If the URL starts
        with our URL scheme, we convert the URL to a URL-compatible TorrentDef.
        
        @param url URL
        @return TorrentDef.
        """
        # Class method, no locking required
        if url.startswith(P2PURL_SCHEME):
            (metainfo, swarmid) = makeurl.p2purl2metainfo(url)

            # Metainfo created from URL, so create URL compatible TorrentDef.
            metainfo['info']['url-compat'] = 1

            # For testing EXISTING LIVE: ENABLE, for old EXISTING MERKLE: DISABLE
            #metainfo['info']['name.utf-8'] = metainfo['info']['name']

            t = TorrentDef._create(metainfo)

            return t
        else:
            f = urlOpenTimeout(url)
            return TorrentDef._read(f)
Esempio n. 3
0
 def reopen(self):
     while True:
         try:
             self.stream = urlOpenTimeout(self.url)  # 30 sec timeout
             break
         except:
             print_exc()
             time.sleep(5.0)  # No exp. backoff, get back ASAP
Esempio n. 4
0
 def parse(self):
     self.title2entrymap = {}
     print >>sys.stderr,"feedp: Parsing",self.feedurl
     stream = urlOpenTimeout(self.feedurl,10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         #print >> sys.stderr,"feedp: Got title",titleelement.text
         self.title2entrymap[titleelement.text] = entry
Esempio n. 5
0
 def parse(self):
     self.title2entrymap = {}
     print >> sys.stderr, "feedp: Parsing", self.feedurl
     stream = urlOpenTimeout(self.feedurl, 10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         #print >> sys.stderr,"feedp: Got title",titleelement.text
         self.title2entrymap[titleelement.text] = entry
Esempio n. 6
0
 def parse(self):
     self.feedurls = []
     stream = urlOpenTimeout(self.metafeedurl,10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         linkelement = entry.find('{http://www.w3.org/2005/Atom}link')
         if linkelement is not None:
             if linkelement.attrib['type'] == 'application/atom+xml':
                 # Got feed
                 feedurl = linkelement.attrib['href']
                 self.feedurls.append(feedurl)
Esempio n. 7
0
 def parse(self):
     self.feedurls = []
     stream = urlOpenTimeout(self.metafeedurl, 10)
     self.tree = etree.parse(stream)
     entries = self.tree.findall('{http://www.w3.org/2005/Atom}entry')
     for entry in entries:
         titleelement = entry.find('{http://www.w3.org/2005/Atom}title')
         linkelement = entry.find('{http://www.w3.org/2005/Atom}link')
         if linkelement is not None:
             if linkelement.attrib['type'] == 'application/atom+xml':
                 # Got feed
                 feedurl = linkelement.attrib['href']
                 self.feedurls.append(feedurl)
Esempio n. 8
0
def getStatus(url, info_hash):
    try:
        resp = timeouturlopen.urlOpenTimeout(url, timeout=HTTP_TIMEOUT)
        response = resp.read()

    except IOError:
        #        print "IOError"
        return (-1, -1)  # unknown
    except AttributeError:
        #        print "AttributeError"
        return (-2, -2)  # dead

    try:
        response_dict = bdecode(response)

    except:
        #        print "DeCode Error "  + response
        return (-2, -2)  # dead

    try:
        status = response_dict["files"][info_hash]
        seeder = status["complete"]
        if seeder < 0:
            seeder = 0
        leecher = status["incomplete"]
        if leecher < 0:
            leecher = 0

    except KeyError:
        #        print "KeyError "  + info_hash + str(response_dict)
        try:
            if response_dict.has_key("flags"):  # may be interval problem
                if response_dict["flags"].has_key("min_request_interval"):
                    #                    print "interval problem"
                    return (-3, -3)
        except:
            pass
#        print "KeyError "  + info_hash + str(response_dict)
        return (-2, -2)  # dead

    return (seeder, leecher)
def getStatus(url, info_hash):
    try:
        resp = timeouturlopen.urlOpenTimeout(url,timeout=HTTP_TIMEOUT)
        response = resp.read()
        
    except IOError:
#        print "IOError"
        return (-1, -1)                    # unknown
    except AttributeError:
#        print "AttributeError"
        return (-2, -2)                    # dead
    
    try:
        response_dict = bdecode(response)

    except:
#        print "DeCode Error "  + response
        return (-2, -2)                    # dead
    
    try:
        status = response_dict["files"][info_hash]
        seeder = status["complete"]
        if seeder < 0:
            seeder = 0
        leecher = status["incomplete"]
        if leecher < 0:
            leecher = 0
        
    except KeyError:
#        print "KeyError "  + info_hash + str(response_dict)
        try:
            if response_dict.has_key("flags"): # may be interval problem        
                if response_dict["flags"].has_key("min_request_interval"):
#                    print "interval problem"
                    return (-3 ,-3)
        except:
            pass
#        print "KeyError "  + info_hash + str(response_dict)
        return (-2, -2)                    # dead
    
    return (seeder, leecher)
Esempio n. 10
0
    def refresh(self):
        """Returns a generator for a list of (title,urllib2openedurl_to_torrent)
        pairs for this feed. TorrentFeedReader instances keep a list of
        torrent urls in memory and will yield a torrent only once.
        If the feed points to a torrent url with webserver problems,
        that url will not be retried.
        urllib2openedurl_to_torrent may be None if there is a webserver problem.
        """
        
        # Load history from disk
        if not self.urls_already_seen.readed:
            self.urls_already_seen.read()
            self.urls_already_seen.readed = True

        while True:
            try:
                feed_socket = urlOpenTimeout(self.feed_url,timeout=20)
                feed_xml = feed_socket.read()
                feed_socket.close()
                break
            except:
                yield None, None

        # 14/07/08 boudewijn: some special characters and html code is
        # raises a parser exception. We filter out these character
        # sequenses using a regular expression in the filter_xml
        # function
        dom = parseString(self._filter_xml(feed_xml))
        entries = []

        # The following XML will result in three links with the same title.
        #
        # <item>
        # <title>The title</title>
        # <link>http:/frayja.com/torrent/1</link>
        # <foobar src="frayja.com/torrent/2">Unused title</foobar>
        # <moomilk url="frayja.com/torrent/3">Unused title</moomilk>
        # </items>
        for item in dom.getElementsByTagName("item"): #+ dom.getElementsByTagName("entry"):
            title = None
            links = []
            child = item.firstChild
            while child:
                if child.nodeType == 1: # ELEMENT_NODE (according to the DOM standard)
                    if child.nodeName == "title" and child.firstChild:
                        title = child.firstChild.data

                    if child.nodeName == "link" and child.firstChild:
                        links.append(child.firstChild.data)

                    if child.hasAttribute("src"):
                        links.append(child.getAttribute("src"))

                    if child.hasAttribute("url"):
                        links.append(child.getAttribute("url"))

                child = child.nextSibling

            if title and links:
                entries.extend([(title, link) for link in links])

        if DEBUG:
            print >>sys.stderr,time.asctime(),'-', "subscrip: Parse of RSS returned",len(entries),"previously unseen torrents"

        for title,link in entries:
            # print title,link
            try:
                self.urls_already_seen.add(link)
                if DEBUG:
                    print >>sys.stderr,time.asctime(),'-', "subscrip: Opening",title,link
                html_or_tor = urlOpenTimeout(link,timeout=20)
                found_torrent = False
                tor_type = html_or_tor.headers.gettype()
                if self.isTorrentType(tor_type):
                    torrent = html_or_tor
                    found_torrent = True
                    if DEBUG:
                        print >>sys.stderr,time.asctime(),'-', "subscrip: torrent1: Yielding",link
                    yield title,torrent
                elif False: # 'html' in tor_type:
                    html = html_or_tor.read()
                    hrefs = [match.group(1) for match in self.href_re.finditer(html)]
                          
                    urls = []
                    for url in hrefs:
                        if not self.urls_already_seen.contains(url):
                            self.urls_already_seen.add(url)
                            urls.append(urlparse.urljoin(link,url))
                    for url in urls:
                        #print url
                        try:
                            if DEBUG:
                                print >>sys.stderr,time.asctime(),'-', "subscrip: torrent2: Opening",url
                            torrent = urlOpenTimeout(url)
                            url_type = torrent.headers.gettype()
                            #print url_type
                            if self.isTorrentType(url_type):
                                #print "torrent found:",url
                                found_torrent = True
                                if DEBUG:
                                    print >>sys.stderr,time.asctime(),'-', "subscrip: torrent2: Yielding",url
                                yield title,torrent
                                break
                            else:
                                #its not a torrent after all, but just some html link
                                if DEBUG:
                                    print >>sys.stderr,time.asctime(),'-', "subscrip:%s not a torrent" % url
                        except:
                            #url didn't open
                            if DEBUG:
                                print >>sys.stderr,time.asctime(),'-', "subscrip:%s did not open" % url
                if not found_torrent:
                    yield title,None
            except GeneratorExit:
                if DEBUG:
                    print >>sys.stderr,time.asctime(),'-', "subscrip:GENERATOREXIT"
                # the generator is destroyed. we accept this by returning
                return
            except Exception, e:
                print >> sys.stderr, time.asctime(),'-', "rss_client:", e
                yield title,None
Esempio n. 11
0
    def refresh(self):
        """Returns a generator for a list of (title,urllib2openedurl_to_torrent)
        pairs for this feed. TorrentFeedReader instances keep a list of
        torrent urls in memory and will yield a torrent only once.
        If the feed points to a torrent url with webserver problems,
        that url will not be retried.
        urllib2openedurl_to_torrent may be None if there is a webserver problem.
        """

        # Load history from disk
        if not self.urls_already_seen.readed:
            self.urls_already_seen.read()
            self.urls_already_seen.readed = True

        feed_socket = urlOpenTimeout(self.feed_url, timeout=20)
        feed_xml = feed_socket.read()
        feed_socket.close()

        # 14/07/08 boudewijn: some special characters and html code is
        # raises a parser exception. We filter out these character
        # sequenses using a regular expression in the filter_xml
        # function
        dom = parseString(self._filter_xml(feed_xml))
        entries = []

        # The following XML will result in three links with the same title.
        #
        # <item>
        # <title>The title</title>
        # <link>http:/frayja.com/torrent/1</link>
        # <foobar src="frayja.com/torrent/2">Unused title</foobar>
        # <moomilk url="frayja.com/torrent/3">Unused title</moomilk>
        # </items>
        for item in dom.getElementsByTagName(
                "item"):  #+ dom.getElementsByTagName("entry"):
            title = None
            links = []
            child = item.firstChild
            while child:
                if child.nodeType == 1:  # ELEMENT_NODE (according to the DOM standard)
                    if child.nodeName == "title" and child.firstChild:
                        title = child.firstChild.data

                    if child.nodeName == "link" and child.firstChild:
                        links.append(child.firstChild.data)

                    if child.hasAttribute("src"):
                        links.append(child.getAttribute("src"))

                    if child.hasAttribute("url"):
                        links.append(child.getAttribute("url"))

                child = child.nextSibling

            if title and links:
                entries.extend([(title, link) for link in links])

        if DEBUG:
            print >> sys.stderr, "subscrip: Parse of RSS returned", len(
                entries), "previously unseen torrents"

        for title, link in entries:
            # print title,link
            try:
                self.urls_already_seen.add(link)
                if DEBUG:
                    print >> sys.stderr, "subscrip: Opening", title, link
                html_or_tor = urlOpenTimeout(link, timeout=20)
                found_torrent = False
                tor_type = html_or_tor.headers.gettype()
                if self.isTorrentType(tor_type):
                    torrent = html_or_tor
                    found_torrent = True
                    if DEBUG:
                        print >> sys.stderr, "subscrip: torrent1: Yielding", link
                    yield title, torrent
                elif False:  # 'html' in tor_type:
                    html = html_or_tor.read()
                    hrefs = [
                        match.group(1) for match in self.href_re.finditer(html)
                    ]

                    urls = []
                    for url in hrefs:
                        if not self.urls_already_seen.contains(url):
                            self.urls_already_seen.add(url)
                            urls.append(urlparse.urljoin(link, url))
                    for url in urls:
                        #print url
                        try:
                            if DEBUG:
                                print >> sys.stderr, "subscrip: torrent2: Opening", url
                            torrent = urlOpenTimeout(url)
                            url_type = torrent.headers.gettype()
                            #print url_type
                            if self.isTorrentType(url_type):
                                #print "torrent found:",url
                                found_torrent = True
                                if DEBUG:
                                    print >> sys.stderr, "subscrip: torrent2: Yielding", url
                                yield title, torrent
                                break
                            else:
                                #its not a torrent after all, but just some html link
                                if DEBUG:
                                    print >> sys.stderr, "%s not a torrent" % url
                        except:
                            #url didn't open
                            if DEBUG:
                                print >> sys.stderr, "%s did not open" % url
                if not found_torrent:
                    yield title, None
            except GeneratorExit:
                if DEBUG:
                    print >> sys.stderr, "GENERATOREXIT"
                # the generator is destroyed. we accept this by returning
                return
            except:
                traceback.print_exc()
                yield title, None