def url_rewrite(self, task, entry): if 'url' not in entry: log.error("Didn't actually get a URL...") else: url = entry['url'] log.debug("Got the URL: %s" % entry['url']) rawdata = "" try: request = urllib2.Request(url) response = urllib2.urlopen(request) except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url, e)) rawdata = response.read() match = re.search(r"<a href=\"/torrents/download/\?id=(\d*?)\">.*\.torrent</a>", rawdata) if match: torrent_id = match.group(1) log.debug("Got the Torrent ID: %s" % torrent_id) entry['url'] = 'http://www.t411.me/torrents/download/?id=' + torrent_id if 'download_auth' in list(entry): auth_handler = t411Auth(entry['download_auth'][0], entry['download_auth'][1]) entry['download_auth'] = auth_handler else: raise UrlRewritingError("Cannot find torrent ID")
def url_rewrite(self, task, entry): if 'url' not in entry: log.error("Didn't actually get a URL...") else: url = entry['url'] log.debug("Got the URL: %s" % entry['url']) rawdata = "" try: opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(url) except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url, e)) rawdata = response.read() match = re.search( r"<a href=\"/torrents/download/\?id=(\d*?)\">.*\.torrent</a>", rawdata) if match: torrent_id = match.group(1) log.debug("Got the Torrent ID: %s" % torrent_id) entry[ 'url'] = 'http://www.t411.in/torrents/download/?id=' + torrent_id if 'download_auth' in entry: auth_handler = t411Auth(*entry['download_auth']) entry['download_auth'] = auth_handler else: raise UrlRewritingError("Cannot find torrent ID")
def parse_download_page(self, url): if 'newpct1.com' in url: log.verbose('Newpct1 URL: %s', url) url = url.replace('newpct1.com/', 'newpct1.com/descarga-torrent/') else: log.verbose('Newpct URL: %s', url) try: page = requests.get(url) except requests.exceptions.RequestException as e: raise UrlRewritingError(e) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(e) if 'newpct1.com' in url: torrent_id_prog = re.compile(r'descargar-torrent/(.+)/') torrent_ids = soup.findAll(href=torrent_id_prog) else: torrent_id_prog = re.compile("'(?:torrentID|id)'\s*:\s*'(\d+)'") torrent_ids = soup.findAll(text=torrent_id_prog) if len(torrent_ids) == 0: raise UrlRewritingError('Unable to locate torrent ID from url %s' % url) if 'newpct1.com' in url: torrent_id = torrent_id_prog.search( torrent_ids[0]['href']).group(1) return 'http://www.newpct1.com/download/%s.torrent' % torrent_id else: torrent_id = torrent_id_prog.search(torrent_ids[0]).group(1) return 'http://www.newpct.com/torrents/{:0>6}.torrent'.format( torrent_id)
def url_rewrite(self, task, entry): url = entry['url'] page = None for (scheme, netloc) in EZTV_MIRRORS: try: _, _, path, params, query, fragment = urlparse(url) url = urlunparse( (scheme, netloc, path, params, query, fragment)) page = task.requests.get(url).content except RequestException as e: log.debug('Eztv mirror `%s` seems to be down', url) continue break if not page: raise UrlRewritingError('No mirrors found for url %s' % entry['url']) log.debug('Eztv mirror `%s` chosen', url) try: soup = get_soup(page) mirrors = soup.find_all( 'a', attrs={'class': re.compile(r'download_\d')}) except Exception as e: raise UrlRewritingError(e) log.debug('%d torrent mirrors found', len(mirrors)) if not mirrors: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) entry['urls'] = [m.get('href') for m in mirrors] entry['url'] = mirrors[0].get('href')
def parse_downloads(self, series_url, search_title): page = requests.get(series_url).content try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) urls = [] # find all titles episode_titles = self.find_all_titles(search_title) if not episode_titles: raise UrlRewritingError('Unable to find episode') for ep_title in episode_titles: # find matching download episode_title = soup.find('strong', text=re.compile(ep_title, re.I)) if not episode_title: continue # find download container episode = episode_title.parent if not episode: continue # find episode language episode_lang = episode.find_previous( 'strong', text=re.compile('Sprache')).next_sibling if not episode_lang: log.warning('No language found for: %s' % series_url) continue # filter language if not self.check_language(episode_lang): log.warning('languages not matching: %s <> %s' % (self.config['language'], episode_lang)) continue # find download links links = episode.find_all('a') if not links: log.warning('No links found for: %s' % series_url) continue for link in links: if not link.has_attr('href'): continue url = link['href'] pattern = 'http:\/\/download\.serienjunkies\.org.*%s_.*\.html' % self.config[ 'hoster'] if re.match(pattern, url) or self.config['hoster'] == 'all': urls.append(url) else: continue return urls
def parse_download_page(self, url, requests): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} page = requests.get(url, headers=txheaders) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(e) down_link = soup.find('a', attrs={'href': re.compile(".+mp4")}) if not down_link: raise UrlRewritingError('Unable to locate download link from url %s' % url) return down_link.get('href')
def parse_download(self, series_url, search_title, config, entry): page = requests.get(series_url).content try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) config = config or {} config.setdefault('hoster', 'ul') config.setdefault('language', 'en') # find matching download episode_title = soup.find('strong', text=search_title) if not episode_title: raise UrlRewritingError('Unable to find episode') # find download container episode = episode_title.parent if not episode: raise UrlRewritingError('Unable to find episode container') # find episode language episode_lang = episode.find_previous( 'strong', text=re.compile('Sprache')).next_sibling if not episode_lang: raise UrlRewritingError('Unable to find episode language') # filter language if config['language'] in ['de', 'both']: if not re.search( 'german|deutsch', episode_lang, flags=re.IGNORECASE): entry.reject('Language does not match') if config['language'] in ['en', 'both']: if not re.search('englisc?h', episode_lang, flags=re.IGNORECASE): entry.reject('Language does not match') # find download links links = episode.find_all('a') if not links: raise UrlRewritingError('Unable to find download links') for link in links: if not link.has_attr('href'): continue url = link['href'] pattern = 'http:\/\/download\.serienjunkies\.org.*%s_.*\.html' % config[ 'hoster'] if re.match(pattern, url): return url else: log.debug('Hoster does not match') continue
def parse_download_page(self, url, requests): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} page = requests.get(url, headers=txheaders) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(e) tag_a = soup.find('a', attrs={'class': 'download_link'}) if not tag_a: raise UrlRewritingError('Unable to locate download link from url %s' % url) torrent_url = 'http://www.bakabt.com' + tag_a.get('href') return torrent_url
def parse_download_page(self, url): page = requests.get(url) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(e) torrent_id_prog = re.compile("'(?:torrentID|id)'\s*:\s*'(\d+)'") torrent_ids = soup.findAll(text=torrent_id_prog) if len(torrent_ids) == 0: raise UrlRewritingError('Unable to locate torrent ID from url %s' % url) torrent_id = torrent_id_prog.search(torrent_ids[0]).group(1) return 'http://www.newpct.com/descargar/torrent/%s/dummy.html' % torrent_id
def url_from_page(self, url): """Parses torrent url from newtorrents download page""" try: page = urlopener(url, log) data = page.read() except urllib2.URLError: raise UrlRewritingError('URLerror when retrieving page') p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE) f = p.search(data) if not f: # the link in which plugin relies is missing! raise UrlRewritingError('Failed to get url from download page. Plugin may need a update.') else: return f.group(1)
def parse_download_page(self, page_url): page = urlopener(page_url, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) tag_a = soup.find("a", {"class": "dl_link"}) if not tag_a: raise UrlRewritingError( 'FTDB Unable to locate download link from url %s and tag_a is : %s' % (page_url, tag_a)) torrent_url = "http://www3.frenchtorrentdb.com" + tag_a.get( 'href') + "&js=1" log.debug('TORRENT URL is : %s' % torrent_url) return torrent_url
def parse_download_page(self, url): page = requests.get(url,verify=False).content try: soup = get_soup(page) tag_div = soup.find('div', attrs={'class': 'download'}) if not tag_div: raise UrlRewritingError('Unable to locate download link from url %s' % url) tag_a = tag_div.find('a') torrent_url = tag_a.get('href') # URL is sometimes missing the schema if torrent_url.startswith('//'): torrent_url = 'http:' + torrent_url return torrent_url except Exception as e: raise UrlRewritingError(e)
def parse_download_page(self, url): page = urlopener(url, log) log.debug('%s opened', url) try: soup = get_soup(page) torrent_url = 'http://www.t411.me' + soup.find( text='Télécharger').findParent().get('href') except Exception as e: raise UrlRewritingError(e) if not torrent_url: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) return torrent_url
class UrlRewriteNewPCT(object): """NewPCT urlrewriter.""" # urlrewriter API def url_rewritable(self, task, entry): url = entry['url'] if url.startswith('http://www.newpct.com/download/'): return False if url.startswith('http://www.newpct.com/') or url.startswith('http://newpct.com/'): return True return False # urlrewriter API def url_rewrite(self, task, entry): entry['url'] = self.parse_download_page(entry['url']) @internet(log) def parse_download_page(self, url): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception, e: raise UrlRewritingError(e) down_link = soup.find('a', attrs={'href': re.compile("descargar/torrent/")}) if not down_link: raise UrlRewritingError('Unable to locate download link from url %s' % url) return down_link.get('href')
def parse_download_page(self, url): txheaders = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) down_link = soup.find( 'a', attrs={'href': re.compile("download/\d+/.*\.torrent")}) if not down_link: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) return 'http://www.deadfrog.us/' + down_link.get('href')
def url_rewrite(self, task, entry): url = entry['url'] if (url.startswith('http://www.newtorrents.info/?q=') or url.startswith('http://www.newtorrents.info/search')): try: url = self.entries_from_search(entry['title'], url=url)[0]['url'] except PluginWarning as e: raise UrlRewritingError(e.value) else: url = self.url_from_page(url) if url: entry['url'] = url self.resolved.append(url) else: raise UrlRewritingError('Bug in newtorrents urlrewriter')
def url_rewrite(self, task, entry): log.debug('Requesting %s' % entry['url']) page = requests.get(entry['url']) soup = get_soup(page.text) for link in soup.findAll('a', attrs={'href': re.compile(r'^/url')}): # Extract correct url from google internal link href = 'http://google.com' + link['href'] args = parse_qs(urlparse(href).query) href = args['q'][0] # import IPython; IPython.embed() # import sys # sys.exit(1) # href = link['href'].lstrip('/url?q=').split('&')[0] # Test if entry with this url would be recognized by some urlrewriter log.trace('Checking if %s is known by some rewriter' % href) fake_entry = {'title': entry['title'], 'url': href} urlrewriting = plugin.get_plugin_by_name('urlrewriting') if urlrewriting['instance'].url_rewritable(task, fake_entry): log.debug('--> rewriting %s (known url pattern)' % href) entry['url'] = href return else: log.debug('<-- ignoring %s (unknown url pattern)' % href) raise UrlRewritingError('Unable to resolve')
def url_rewrite(self, task, entry): url = entry['url'] if (url.startswith('http://www.newtorrents.info/?q=') or url.startswith('http://www.newtorrents.info/search')): results = self.entries_from_search(entry['title'], url=url) if not results: raise UrlRewritingError("No matches for %s" % entry['title']) url = results[0]['url'] else: url = self.url_from_page(url) if url: entry['url'] = url self.resolved.append(url) else: raise UrlRewritingError('Bug in newtorrents urlrewriter')
def parse_download_page(self, url): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception, e: raise UrlRewritingError(e)
def get_login_cookies(self, username, password): url_auth = 'http://www.t411.me/users/login' db_session = Session() account = db_session.query(torrent411Account).filter( torrent411Account.username == username).first() if account: if account.expiry_time < datetime.now(): db_session.delete(account) db_session.commit() log.debug("Cookies found in db!") return account.auth else: log.debug("Getting login cookies from : %s " % url_auth) params = urllib.urlencode({ 'login': username, 'password': password, 'remember': '1' }) cj = cookielib.CookieJar() # WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) # NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK opener.addheaders = [('User-agent', self.USER_AGENT)] try: opener.open(url_auth, params) except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e)) authKey = None uid = None password = None for cookie in cj: if cookie.name == "authKey": authKey = cookie.value if cookie.name == "uid": uid = cookie.value if cookie.name == "pass": password = cookie.value if authKey is not None and \ uid is not None and \ password is not None: authCookie = { 'uid': uid, 'password': password, 'authKey': authKey } db_session.add( torrent411Account(username=username, auth=authCookie, expiry_time=datetime.now() + timedelta(days=1))) db_session.commit() return authCookie return {"uid": "", "password": "", "authKey": ""}
def parse_download_page(self, page_url, requests): page = requests.get(page_url) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(e) tag_a = soup.find("a", {"class": "dl_link"}) if not tag_a: if soup.findAll(text="Connexion ?"): raise UrlRewritingError('You are not logged in,\ check if your cookie for\ authentication is up to date') else: raise UrlRewritingError('You have reached your download\ limit per 24hours, so I cannot\ get the torrent') torrent_url = ("http://www.frenchtorrentdb.com" + tag_a.get('href') + "&js=1") log.debug('TORRENT URL is : %s' % torrent_url) return torrent_url
def parse_download_page(self, url): try: page = requests.get(url).content soup = get_soup(page, 'html.parser') download_link = soup.findAll( href=re.compile('redirect|redirectlink')) download_href = download_link[0]['href'] return download_href except Exception: raise UrlRewritingError('Unable to locate torrent from url %s' % url)
def url_rewrite(self, task, entry): try: # need to fake user agent txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} page = task.requests.get(entry['url'], headers=txheaders) soup = get_soup(page.text) results = soup.find_all('a', attrs={'class': 'l'}) if not results: raise UrlRewritingError('No results') for res in results: url = res.get('href') url = url.replace('/interstitial?url=', '') # generate match regexp from google search result title regexp = '.*'.join([x.contents[0] for x in res.find_all('em')]) if re.match(regexp, entry['title']): log.debug('resolved, found with %s' % regexp) entry['url'] = url return raise UrlRewritingError('Unable to resolve') except Exception as e: raise UrlRewritingError(e)
def url_rewrite(self, task, entry): if not 'url' in entry: log.error("Didn't actually get a URL...") else: log.debug("Got the URL: %s" % entry['url']) if entry['url'].startswith(BASE_URL + '/t?'): # use search results = self.search(task, entry) if not results: raise UrlRewritingError("No search results found") # TODO: Search doesn't enforce close match to title, be more picky entry['url'] = results[0]['url']
def url_rewrite(self, task, entry): if 'url' not in entry: log.error("Didn't actually get a URL...") else: log.debug("Got the URL: %s" % entry['url']) if entry['url'].startswith('http://torrentleech.org/torrents/browse/index/query/'): # use search results = self.search(task, entry) if not results: raise UrlRewritingError("No search results found") # TODO: Search doesn't enforce close match to title, be more picky entry['url'] = results[0]['url']
def url_rewrite(self, task, entry): if not 'url' in entry: log.error("Didn't actually get a URL...") else: log.debug("Got the URL: %s" % entry['url']) if entry['url'].startswith( 'http://torrentleech.org/torrents/browse/index/query/'): # use search try: entry['url'] = self.search(entry)[0]['url'] except PluginWarning as e: raise UrlRewritingError(e) else: entry['url'] = entry['url']
def url_rewrite(self, task, entry): if 'url' not in entry: log.error("Didn't actually get a URL...") else: log.debug("Got the URL: %s" % entry['url']) if URL_SEARCH.match(entry['url']): # use search results = self.search(task, entry) if not results: raise UrlRewritingError("No search results found") # TODO: Close matching was taken out of search methods, this may need to be fixed to be more picky entry['url'] = results[0]['url'] else: # parse download page entry['url'] = self.parse_download_page(entry['url'])
def url_rewrite(self, task, entry): if not 'url' in entry: log.error("Didn't actually get a URL...") else: log.debug("Got the URL: %s" % entry['url']) if entry['url'].startswith(('http://thepiratebay.se/search/', 'http://thepiratebay.org/search/')): # use search try: entry['url'] = self.search(entry['title'])[0]['url'] except PluginWarning as e: raise UrlRewritingError(e) else: # parse download page entry['url'] = self.parse_download_page(entry['url'])
def url_rewrite(self, task, entry): for name, config in self.resolves.get(task.name, {}).items(): regexp = config['regexp_compiled'] format = config['format'] if regexp.search(entry['url']): log.debug('Regexp resolving %s with %s' % (entry['url'], name)) # run the regexp entry['url'] = regexp.sub(format, entry['url']) if regexp.match(entry['url']): entry.fail('urlrewriting') raise UrlRewritingError( 'Regexp %s result should NOT continue to match!' % name) return
def get_login_cookies(self, username, password): url_auth = 'http://www.t411.in/users/login' db_session = Session() account = db_session.query(torrent411Account).filter( torrent411Account.username == username).first() if account: if account.expiry_time < datetime.now(): db_session.delete(account) db_session.commit() log.debug("Cookies found in db!") return account.auth else: log.debug("Getting login cookies from : %s " % url_auth) params = {'login': username, 'password': password, 'remember': '1'} cj = cookielib.CookieJar() # WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) # NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK opener.addheaders = [('User-agent', self.USER_AGENT)] login_output = None try: login_output = opener.open(url_auth, urllib.urlencode(params)).read() except Exception as e: raise UrlRewritingError("Connection Error for %s : %s" % (url_auth, e)) if b'confirmer le captcha' in login_output: log.warn("Captcha requested for login.") login_output = self._solveCaptcha(login_output, url_auth, params, opener) if b'logout' in login_output: authKey = None uid = None password = None for cookie in cj: if cookie.name == "authKey": authKey = cookie.value if cookie.name == "uid": uid = cookie.value if cookie.name == "pass": password = cookie.value if authKey is not None and \ uid is not None and \ password is not None: authCookie = { 'uid': uid, 'password': password, 'authKey': authKey } db_session.add( torrent411Account(username=username, auth=authCookie, expiry_time=datetime.now() + timedelta(days=1))) db_session.commit() return authCookie else: log.error( "Login failed (Torrent411). Check your login and password." ) return {}