예제 #1
0
 def url_from_page(self, url):
     """Parses torrent url from newtorrents download page"""
     try:
         page = urlopener(url, log)
         data = page.read()
     except urllib2.URLError:
         raise UrlRewritingError('URLerror when retrieving page')
     p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE)
     f = p.search(data)
     if not f:
         # the link in which plugin relies is missing!
         raise UrlRewritingError(
             'Failed to get url from download page. Plugin may need a update.'
         )
     else:
         return f.group(1)
예제 #2
0
class UrlRewriteBakaBT(object):
    """BakaBT urlrewriter."""

    # urlrewriter API
    def url_rewritable(self, task, entry):
        url = entry['url']
        if url.startswith('http://www.bakabt.com/download/'):
            return False
        if url.startswith('http://www.bakabt.com/') or url.startswith(
                'http://bakabt.com/'):
            return True
        return False

    # urlrewriter API
    def url_rewrite(self, task, entry):
        entry['url'] = self.parse_download_page(entry['url'])

    @internet(log)
    def parse_download_page(self, url):
        txheaders = {
            'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        }
        req = urllib2.Request(url, None, txheaders)
        page = urlopener(req, log)
        try:
            soup = get_soup(page)
        except Exception, e:
            raise UrlRewritingError(e)
        tag_a = soup.find('a', attrs={'class': 'download_link'})
        if not tag_a:
            raise UrlRewritingError(
                'Unable to locate download link from url %s' % url)
        torrent_url = 'http://www.bakabt.com' + tag_a.get('href')
        return torrent_url
예제 #3
0
 def parse_download_page(self, url):
     page = requests.get(url).content
     try:
         soup = get_soup(page)
         tag_div = soup.find('div', attrs={'class': 'download'})
         if not tag_div:
             raise UrlRewritingError(
                 'Unable to locate download link from url %s' % url)
         tag_a = tag_div.find('a')
         torrent_url = tag_a.get('href')
         # URL is sometimes missing the schema
         if torrent_url.startswith('//'):
             torrent_url = 'http:' + torrent_url
         return torrent_url
     except Exception, e:
         raise UrlRewritingError(e)
예제 #4
0
 def url_rewrite(self, task, entry):
     url = entry['url']
     if (url.startswith('http://www.newtorrents.info/?q=')
             or url.startswith('http://www.newtorrents.info/search')):
         try:
             url = self.entries_from_search(entry['title'],
                                            url=url)[0]['url']
         except PluginWarning, e:
             raise UrlRewritingError(e.value)
예제 #5
0
 def parse_download_page(self, url):
     txheaders = {
         'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     }
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception, e:
         raise UrlRewritingError(e)
예제 #6
0
 def url_rewrite(self, task, entry):
     if not 'url' in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if entry['url'].startswith(('http://thepiratebay.se/search/',
                                 'http://thepiratebay.org/search/')):
         # use search
         try:
             entry['url'] = self.search(entry['title'])[0]['url']
         except PluginWarning, e:
             raise UrlRewritingError(e)
예제 #7
0
    def url_rewrite(self, task, entry):
        for name, config in self.resolves.iteritems():
            regexp = config['regexp_compiled']
            format = config['format']
            if regexp.search(entry['url']):
                log.debug('Regexp resolving %s with %s' % (entry['url'], name))

                # run the regexp
                entry['url'] = regexp.sub(format, entry['url'])

                if regexp.match(entry['url']):
                    task.fail(entry, 'urlrewriting')
                    task.purge()
                    from plugin_urlrewriting import UrlRewritingError
                    raise UrlRewritingError('Regexp %s result should NOT continue to match!' % name)
                return
예제 #8
0
        url = entry['url']
        if (url.startswith('http://www.newtorrents.info/?q=')
                or url.startswith('http://www.newtorrents.info/search')):
            try:
                url = self.entries_from_search(entry['title'],
                                               url=url)[0]['url']
            except PluginWarning, e:
                raise UrlRewritingError(e.value)
        else:
            url = self.url_from_page(url)

        if url:
            entry['url'] = url
            self.resolved.append(url)
        else:
            raise UrlRewritingError('Bug in newtorrents urlrewriter')

    # Search plugin API
    def search(self, query, comparator, config=None):
        return self.entries_from_search(query, comparator=comparator)

    @internet(log)
    def url_from_page(self, url):
        """Parses torrent url from newtorrents download page"""
        try:
            page = urlopener(url, log)
            data = page.read()
        except urllib2.URLError:
            raise UrlRewritingError('URLerror when retrieving page')
        p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE)
        f = p.search(data)