def url_from_page(self, url): """Parses torrent url from newtorrents download page""" try: page = urlopener(url, log) data = page.read() except urllib2.URLError: raise UrlRewritingError('URLerror when retrieving page') p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE) f = p.search(data) if not f: # the link in which plugin relies is missing! raise UrlRewritingError( 'Failed to get url from download page. Plugin may need a update.' ) else: return f.group(1)
class UrlRewriteBakaBT(object): """BakaBT urlrewriter.""" # urlrewriter API def url_rewritable(self, task, entry): url = entry['url'] if url.startswith('http://www.bakabt.com/download/'): return False if url.startswith('http://www.bakabt.com/') or url.startswith( 'http://bakabt.com/'): return True return False # urlrewriter API def url_rewrite(self, task, entry): entry['url'] = self.parse_download_page(entry['url']) @internet(log) def parse_download_page(self, url): txheaders = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception, e: raise UrlRewritingError(e) tag_a = soup.find('a', attrs={'class': 'download_link'}) if not tag_a: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) torrent_url = 'http://www.bakabt.com' + tag_a.get('href') return torrent_url
def parse_download_page(self, url): page = requests.get(url).content try: soup = get_soup(page) tag_div = soup.find('div', attrs={'class': 'download'}) if not tag_div: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) tag_a = tag_div.find('a') torrent_url = tag_a.get('href') # URL is sometimes missing the schema if torrent_url.startswith('//'): torrent_url = 'http:' + torrent_url return torrent_url except Exception, e: raise UrlRewritingError(e)
def url_rewrite(self, task, entry): url = entry['url'] if (url.startswith('http://www.newtorrents.info/?q=') or url.startswith('http://www.newtorrents.info/search')): try: url = self.entries_from_search(entry['title'], url=url)[0]['url'] except PluginWarning, e: raise UrlRewritingError(e.value)
def parse_download_page(self, url): txheaders = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception, e: raise UrlRewritingError(e)
def url_rewrite(self, task, entry): if not 'url' in entry: log.error("Didn't actually get a URL...") else: log.debug("Got the URL: %s" % entry['url']) if entry['url'].startswith(('http://thepiratebay.se/search/', 'http://thepiratebay.org/search/')): # use search try: entry['url'] = self.search(entry['title'])[0]['url'] except PluginWarning, e: raise UrlRewritingError(e)
def url_rewrite(self, task, entry): for name, config in self.resolves.iteritems(): regexp = config['regexp_compiled'] format = config['format'] if regexp.search(entry['url']): log.debug('Regexp resolving %s with %s' % (entry['url'], name)) # run the regexp entry['url'] = regexp.sub(format, entry['url']) if regexp.match(entry['url']): task.fail(entry, 'urlrewriting') task.purge() from plugin_urlrewriting import UrlRewritingError raise UrlRewritingError('Regexp %s result should NOT continue to match!' % name) return
url = entry['url'] if (url.startswith('http://www.newtorrents.info/?q=') or url.startswith('http://www.newtorrents.info/search')): try: url = self.entries_from_search(entry['title'], url=url)[0]['url'] except PluginWarning, e: raise UrlRewritingError(e.value) else: url = self.url_from_page(url) if url: entry['url'] = url self.resolved.append(url) else: raise UrlRewritingError('Bug in newtorrents urlrewriter') # Search plugin API def search(self, query, comparator, config=None): return self.entries_from_search(query, comparator=comparator) @internet(log) def url_from_page(self, url): """Parses torrent url from newtorrents download page""" try: page = urlopener(url, log) data = page.read() except urllib2.URLError: raise UrlRewritingError('URLerror when retrieving page') p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE) f = p.search(data)