def downloadFile(self, pyfile): header = self.load(pyfile.url, just_header = True) #self.logDebug(header) # self.load does not raise a BadHeader on 404 responses, do it here if header.has_key('code') and header['code'] == 404: raise BadHeader(404) if 'location' in header: self.logDebug("Location: " + header['location']) url = unquote(header['location']) else: url = pyfile.url name = html_unescape(unquote(urlparse(url).path.split("/")[-1])) if 'content-disposition' in header: self.logDebug("Content-Disposition: " + header['content-disposition']) m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition']) if m: disp = m.groupdict() self.logDebug(disp) if not disp['enc']: disp['enc'] = 'utf-8' name = remove_chars(disp['name'], "\"';").strip() name = unicode(unquote(name), disp['enc']) if not name: name = url pyfile.name = name self.logDebug("Filename: %s" % pyfile.name) self.download(url, disposition=True)
def decrypt(self, pyfile): self.prepare() self.logDebug("Looking for link redirect...") self.handleDirect(pyfile) if self.link: self.urls = [self.link] else: self.preload() self.checkInfo() self.links = self.getLinks() or list() if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): self.handlePages(pyfile) self.logDebug("Package has %d links" % len(self.links)) if self.links: self.links = [html_unescape(l.decode('unicode-escape').strip()) for l in self.links] #@TODO: Move to Crypter in 0.4.10 self.packages = [(self.info['name'], self.links, self.info['folder'])] elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 self.fail(_("No link grabbed"))
def handleCaptcha(self, inputs): captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1) self.logDebug(captcha_div) numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) inputs['code'] = "".join([a[1] for a in sorted(numerals, key = lambda num: int(num[0]))]) self.logDebug("CAPTCHA", inputs['code'], numerals) return 3
def handlePremium(self): found = re.search(self.PREMIUM_URL_PATTERN, self.html) if not found: self.parseError("Premium URL") url = html_unescape(found.group(1)) self.logDebug("Premium URL: " + url) if not url.startswith("http://"): self.resetAccount() self.download(url)
def getFileInfo(self): self.logDebug("URL: %s" % self.pyfile.url) if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): self.tempOffline() name, size, status = parseFileInfo(self)[:3] if status == 1: self.offline() elif status != 2: self.logDebug(self.file_info) self.parseError('File info') if name: self.pyfile.name = name else: self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) if size: self.pyfile.size = size else: self.logError("File size not parsed") self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size)) return self.file_info
def process(self, pyfile): self.prepare() if not re.match(self.__pattern__, self.pyfile.url): if self.premium: self.handleOverriden() else: self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME) else: try: # Due to a 0.4.9 core bug self.load would use cookies even if # cookies=False. Workaround using getURL to avoid cookies. # Can be reverted in 0.5 as the cookies bug has been fixed. self.html = getURL(pyfile.url, decode=True) self.file_info = self.getFileInfo() except PluginParseError: self.file_info = None self.location = self.getDirectDownloadLink() if not self.file_info: pyfile.name = html_unescape( unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1]) ) if self.location: self.startDownload(self.location) elif self.premium: self.handlePremium() else: self.handleFree()
def handleCaptcha(self, inputs): captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1) self.logDebug(captcha_div) numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) self.logDebug("CAPTCHA", inputs['code'], numerals) return 3
def process(self, pyfile): self.prepare() if not re.match(self.__pattern__, self.pyfile.url): if self.premium: self.handleOverriden() else: self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME) else: try: # Due to a 0.4.9 core bug self.load would use cookies even if # cookies=False. Workaround using getURL to avoid cookies. # Can be reverted in 0.5 as the cookies bug has been fixed. self.html = getURL(pyfile.url, decode=True) self.file_info = self.getFileInfo() except PluginParseError: self.file_info = None self.location = self.getDirectDownloadLink() if not self.file_info: pyfile.name = html_unescape(unquote(urlparse( self.location if self.location else pyfile.url).path.split("/")[-1])) if self.location: self.startDownload(self.location) elif self.premium: self.handlePremium() else: self.handleFree()
def handleCaptcha(self, inputs): found = re.search(self.RECAPTCHA_URL_PATTERN, self.html) if found: recaptcha_key = unquote(found.group(1)) self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key) recaptcha = ReCaptcha(self) inputs["recaptcha_challenge_field"], inputs["recaptcha_response_field"] = recaptcha.challenge(recaptcha_key) return 1 else: found = re.search(self.CAPTCHA_URL_PATTERN, self.html) if found: captcha_url = found.group(1) inputs["code"] = self.decryptCaptcha(captcha_url) return 2 else: found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S) if found: captcha_div = found.group(1) self.logDebug(captcha_div) numerals = re.findall( r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>", html_unescape(captcha_div) ) inputs["code"] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) self.logDebug("CAPTCHA", inputs["code"], numerals) return 3 else: found = re.search(self.SOLVEMEDIA_PATTERN, self.html) if found: captcha_key = found.group(1) captcha = SolveMedia(self) inputs["adcopy_challenge"], inputs["adcopy_response"] = captcha.challenge(captcha_key) return 4 return 0
def process(self, pyfile): self.prepare() if not re.match(self.__pattern__, self.pyfile.url): if self.premium: self.handleOverriden() else: self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME) else: try: self.html = self.load(pyfile.url, cookies = False, decode = True) self.file_info = self.getFileInfo() except PluginParseError: self.file_info = None self.location = self.getDirectDownloadLink() if not self.file_info: pyfile.name = html_unescape(unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])) if self.location: self.startDownload(self.location) elif self.premium: self.handlePremium() else: self.handleFree()
def handleCaptcha(self, inputs): found = re.search(self.RECAPTCHA_URL_PATTERN, self.html) if found: recaptcha_key = unquote(found.group(1)) self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key) recaptcha = ReCaptcha(self) inputs['recaptcha_challenge_field'], inputs[ 'recaptcha_response_field'] = recaptcha.challenge( recaptcha_key) return 1 else: found = re.search(self.CAPTCHA_URL_PATTERN, self.html) if found: captcha_url = found.group(1) inputs['code'] = self.decryptCaptcha(captcha_url) return 2 else: found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S) if found: captcha_div = found.group(1) self.logDebug(captcha_div) numerals = re.findall( '<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) inputs['code'] = "".join([ a[1] for a in sorted(numerals, key=lambda num: int(num[0])) ]) self.logDebug("CAPTCHA", inputs['code'], numerals) return 3 return 0
def downloadFile(self, pyfile): url = pyfile.url for i in range(5): header = self.load(url, just_header=True) # self.load does not raise a BadHeader on 404 responses, do it here if 'code' in header and header['code'] == 404: raise BadHeader(404) if 'location' in header: self.logDebug("Location: " + header['location']) url = unquote(header['location']) else: break name = html_unescape(unquote(urlparse(url).path.split("/")[-1])) if 'content-disposition' in header: self.logDebug("Content-Disposition: " + header['content-disposition']) m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition']) if m: disp = m.groupdict() self.logDebug(disp) if not disp['enc']: disp['enc'] = 'utf-8' name = remove_chars(disp['name'], "\"';").strip() name = unicode(unquote(name), disp['enc']) if not name: name = url pyfile.name = name self.logDebug("Filename: %s" % pyfile.name) self.download(url, disposition=True)
def handleWebLinks(self): self.logDebug("Search for Web links ") package_links = [] pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="([^"]*?)"[^>]*?name="file"' ids = re.findall(pattern, self.html, re.I | re.S) self.logDebug("Decrypting %d Web links" % len(ids)) for idx, weblink_id in enumerate(ids): try: self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id)) res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id}) indexs = res.find("window.location =") + 19 indexe = res.find('"', indexs) link2 = res[indexs:indexe] self.logDebug(link2) link2 = html_unescape(link2) package_links.append(link2) except Exception, detail: self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))
def __init__(self, show, epPageLink, linkText): self.url['epPage'] = epPageLink self.seasonNum, self.episodeNum = extractSeasonAndEpisodeNum(linkText) m = re.search(r'\S*\s(.*)', linkText) if m: self.episodeName = html_unescape(m.group(1)) self.show = show
def handleWebLinks(self): self.logDebug("Search for Web links ") package_links = [] pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"' ids = re.findall(pattern, self.html, re.I | re.S) self.logDebug("Decrypting %d Web links" % len(ids)) for idx, weblink_id in enumerate(ids): try: res = self.load("http://linkcrypt.ws/out.html", post={'file': weblink_id}) indexs = res.find("window.location =") + 19 indexe = res.find('"', indexs) link2 = res[indexs:indexe] link2 = html_unescape(link2) package_links.append(link2) except Exception, detail: self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))
def proceed(self, url, location): url = self.parent.url self.html = self.load(url) link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html) for id in link_ids: new_link = html_unescape( re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1)) self.urls.append(new_link)
def get_file_name(self): try: name = self.api["name"] except KeyError: file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="' name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1] return html_unescape(name)
def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, req=None): """ Load content at url and returns it :param url: :param get: :param post: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Loaded content """ if hasattr(self, 'pyfile') and self.pyfile.abort: self.abort() url = fixurl(url) if not url or not isinstance(url, basestring): self.fail(_("No url given")) if self.pyload.debug: self.log_debug("LOAD URL " + url, *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")]) if req is None: req = self.req or self.pyload.requestFactory.getRequest(self.__name__) #@TODO: Move to network in 0.4.10 if hasattr(self, 'COOKIES') and isinstance(self.COOKIES, list): set_cookies(req.cj, self.COOKIES) res = req.load(url, get, post, ref, bool(cookies), just_header, multipart, decode is True) #@TODO: Fix network multipart in 0.4.10 #@TODO: Move to network in 0.4.10 if decode: res = html_unescape(res) #@TODO: Move to network in 0.4.10 if isinstance(decode, basestring): res = decode(res, decode) if self.pyload.debug: frame = inspect.currentframe() framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) try: if not exists(os.path.join("tmp", self.__name__)): os.makedirs(os.path.join("tmp", self.__name__)) with open(framefile, "wb") as f: del frame #: Delete the frame or it wont be cleaned f.write(encode(res)) except IOError, e: self.log_error(e)
def getPackageNameAndFolder(self): if hasattr(self, 'TITLE_PATTERN'): m = re.search(self.TITLE_PATTERN, self.html) if m: name = folder = html_unescape(m.group('title').strip()) self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder)) return name, folder name = self.pyfile.package().name folder = self.pyfile.package().folder self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder)) return name, folder
def handle_free(self, pyfile): fileid = re.search(self.FILEID_PATTERN, self.html).group(1) self.log_debug("FileID: " + fileid) token = re.search(self.TOKEN_PATTERN, self.html).group(1) self.log_debug("Token: " + token) self.html = self.load("http://lolabits.es/action/License/Download", post={'fileId' : fileid, '__RequestVerificationToken' : token}, decode="unicode-escape") self.link = html_unescape(re.search(self.LINK_PATTERN, self.html).group(1))
def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap): # Translate fileInfo = {} try: fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]] fileInfo['name'] = html_unescape(apiFileDataMap['n']) fileInfo['size'] = int(apiFileDataMap['s']) fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']] except: pass return fileInfo
def handleCaptcha(self, inputs): m = re.search(self.CAPTCHA_PATTERN, self.html) if m: captcha_url = m.group(1) inputs['code'] = self.decryptCaptcha(captcha_url) return m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) if m: captcha_div = m.group(1) numerals = re.findall( r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) self.logDebug(captcha_div) inputs['code'] = "".join( a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) self.logDebug("Captcha code: %s" % inputs['code'], numerals) return recaptcha = ReCaptcha(self) try: captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) except Exception: captcha_key = recaptcha.detect_key() else: self.logDebug("ReCaptcha key: %s" % captcha_key) if captcha_key: inputs['recaptcha_response_field'], inputs[ 'recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) return solvemedia = SolveMedia(self) try: captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) except Exception: captcha_key = solvemedia.detect_key() else: self.logDebug("SolveMedia key: %s" % captcha_key) if captcha_key: inputs['adcopy_response'], inputs[ 'adcopy_challenge'] = solvemedia.challenge(captcha_key)
def getInfo(urls): for chunk in chunks(urls, 80): result = [] api = getAPIData(chunk) for data in api.itervalues(): if data[0] == "online": result.append((html_unescape(data[2]), data[1], 2, data[4])) elif data[0] == "offline": result.append((data[4], 0, 1, data[4])) yield result
def download(self, url, *args, **kwargs): if not url or not isinstance(url, basestring): return self.correctCaptcha() url = html_unescape(url.decode('unicode-escape').strip()) #@TODO: Move to Hoster in 0.4.10 if not urlparse.urlparse(url).scheme: url_p = urlparse.urlparse(self.pyfile.url) baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) url = urlparse.urljoin(baseurl, url) return super(SimpleHoster, self).download(url, *args, **kwargs)
def parseFileInfo(self, url = '', html = ''): if not html and hasattr(self, "html"): html = self.html name, size, status, found, fileid = url, 0, 3, None, None if re.search(self.FILE_OFFLINE_PATTERN, html): # File offline status = 1 else: found = re.search(self.FILE_INFO_PATTERN, html) if found: name, fileid = html_unescape(found.group('N')), found.group('ID') size = parseFileSize(found.group('S')) status = 2 return name, size, status, fileid
def parseFileInfo(self, url='', html=''): if not html and hasattr(self, "html"): html = self.html name, size, status, found, fileid = url, 0, 3, None, None if re.search(self.FILE_OFFLINE_PATTERN, html): # File offline status = 1 else: found = re.search(self.FILE_INFO_PATTERN, html) if found: name, fileid = html_unescape(found.group('N')), found.group('ID') size = parseFileSize(found.group('S')) status = 2 return name, size, status, fileid
def process(self, pyfile): if not hasattr(self, "HOSTER_NAME"): self.HOSTER_NAME = re.search(self.__pattern__, self.pyfile.url).group(1) if not hasattr(self, "DIRECT_LINK_PATTERN"): self.DIRECT_LINK_PATTERN = r'(http://(\w+\.%s|\d+\.\d+\.\d+\.\d+)(:\d+/d/|/files/\d+/\w+/)[^"\'<]+)' % self.HOSTER_NAME self.captcha = self.errmsg = None self.passwords = self.getPassword().splitlines() if not re.match(self.__pattern__, self.pyfile.url): if self.premium: self.handleOverriden() else: self.fail( "Only premium users can download from other hosters with %s" % self.HOSTER_NAME) else: try: self.html = self.load(pyfile.url, cookies=False, decode=True) self.file_info = self.getFileInfo() except PluginParseError: self.file_info = None self.req.http.lastURL = self.pyfile.url self.req.http.c.setopt(FOLLOWLOCATION, 0) self.html = self.load(self.pyfile.url, cookies=True, decode=True) self.header = self.req.http.header self.req.http.c.setopt(FOLLOWLOCATION, 1) self.location = None found = re.search("Location\s*:\s*(.*)", self.header, re.I) if found and re.match(self.DIRECT_LINK_PATTERN, found.group(1)): self.location = found.group(1).strip() if not self.file_info: pyfile.name = html_unescape( unquote( urlparse(self.location if self.location else pyfile.url ).path.split("/")[-1])) if self.location: self.startDownload(self.location) elif self.premium: self.handlePremium() else: self.handleFree()
def getPackageNameAndFolder(self): if hasattr(self, 'TITLE_PATTERN'): m = re.search(self.TITLE_PATTERN, self.html) if m: name = folder = html_unescape(m.group('title').strip()) self.logDebug( "Found name [%s] and folder [%s] in package info" % (name, folder)) return name, folder name = self.pyfile.package().name folder = self.pyfile.package().folder self.logDebug( "Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder)) return name, folder
def process(self, pyfile): if not hasattr(self, "HOSTER_NAME"): self.HOSTER_NAME = re.search(self.__pattern__, self.pyfile.url).group(1) if not hasattr(self, "DIRECT_LINK_PATTERN"): self.DIRECT_LINK_PATTERN = ( r'(http://(\w+\.%s|\d+\.\d+\.\d+\.\d+)(:\d+/d/|/files/\d+/\w+/)[^"\'<]+)' % self.HOSTER_NAME ) self.captcha = self.errmsg = None self.passwords = self.getPassword().splitlines() if not re.match(self.__pattern__, self.pyfile.url): if self.premium: self.handleOverriden() else: self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME) else: try: self.html = self.load(pyfile.url, cookies=False, decode=True) self.file_info = self.getFileInfo() except PluginParseError: self.file_info = None self.req.http.lastURL = self.pyfile.url self.req.http.c.setopt(FOLLOWLOCATION, 0) self.html = self.load(self.pyfile.url, cookies=True, decode=True) self.header = self.req.http.header self.req.http.c.setopt(FOLLOWLOCATION, 1) self.location = None found = re.search("Location\s*:\s*(.*)", self.header, re.I) if found and re.match(self.DIRECT_LINK_PATTERN, found.group(1)): self.location = found.group(1).strip() if not self.file_info: pyfile.name = html_unescape( unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1]) ) if self.location: self.startDownload(self.location) elif self.premium: self.handlePremium() else: self.handleFree()
def handleFree(self, pyfile): for _i in xrange(2): m = re.search(self.LINK_FREE_PATTERN, self.html) if m is None: self.error(_("Free download link not found")) else: link = html_unescape(m.group(1).decode('unicode-escape')) if not urlparse.urlparse(link).scheme: link = urlparse.urljoin("https://docs.google.com/", link) direct_link = self.directLink(link, False) if not direct_link: self.html = self.load(link, decode=True) else: self.link = direct_link break
def downloadFile(self, pyfile): url = pyfile.url for i in range(5): header = self.load(url, just_header=True) # self.load does not raise a BadHeader on 404 responses, do it here if 'code' in header and header['code'] == 404: raise BadHeader(404) if 'location' in header: self.logDebug("Location: " + header['location']) base = search(r'https?://[^/]+', url).group(0) if header['location'].startswith("http"): url = unquote(header['location']) elif header['location'].startswith("/"): url = base + unquote(header['location']) else: url = '%s/%s' % (base, unquote(header['location'])) else: break name = html_unescape(unquote(urlparse(url).path.split("/")[-1])) if 'content-disposition' in header: self.logDebug("Content-Disposition: " + header['content-disposition']) m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition']) if m: disp = m.groupdict() self.logDebug(disp) if not disp['enc']: disp['enc'] = 'utf-8' name = remove_chars(disp['name'], "\"';").strip() name = unicode(unquote(name), disp['enc']) if not name: name = url pyfile.name = name self.logDebug("Filename: %s" % pyfile.name) self.download(url, disposition=True)
def process(self, pyfile): self.req.cj.setCookie("uploaded.net", "lang", "en") # doesn't work anymore self.load("http://uploaded.net/language/en") api = getAPIData([pyfile.url]) # TODO: fallback to parse from site, because api sometimes delivers wrong status codes if not api: self.logWarning("No response for API call") self.html = unicode(self.load(pyfile.url, decode=False), 'iso-8859-1') name, size, status, self.fileID = parseFileInfo(self) self.logDebug(name, size, status, self.fileID) if status == 1: self.offline() elif status == 2: pyfile.name, pyfile.size = name, size else: self.fail('Parse error - file info') elif api == 'Access denied': self.fail(_("API key invalid")) else: if self.fileID not in api: self.offline() self.data = api[self.fileID] if self.data[0] != "online": self.offline() pyfile.name = html_unescape(self.data[2]) # self.pyfile.name = self.get_file_name() if self.premium: self.handlePremium() else: self.handleFree()
def handleShow(self, url): src = self.getSJSrc(url) soup = BeautifulSoup(src) packageName = self.pyfile.package().name if self.config.get("changeNameSJ") == "Show": found = html_unescape(soup.find("h2").find("a").string.split(" –")[0]) if found: packageName = found nav = soup.find("div", attrs={"id": "scb"}) package_links = [] for a in nav.findAll("a"): if self.config.get("changeNameSJ") == "Show": package_links.append(a["href"]) else: package_links.append(a["href"] + "#hasName") if self.config.get("changeNameSJ") == "Show": self.packages.append((packageName, package_links, packageName)) else: self.packages.append((self.pyfile.package().name, package_links, self.pyfile.package().name))
def process(self, pyfile): self.req.cj.setCookie("uploaded.net", "lang", "en") # doesn't work anymore self.load("http://uploaded.net/language/en") api = getAPIData([pyfile.url]) # TODO: fallback to parse from site, because api sometimes delivers wrong status codes if not api: self.logWarning("No response for API call") self.html = unicode(self.load(pyfile.url, decode = False), 'iso-8859-1') name, size, status, self.fileID = parseFileInfo(self) self.logDebug(name, size, status, self.fileID) if status == 1: self.offline() elif status == 2: pyfile.name, pyfile.size = name, size else: self.fail('Parse error - file info') elif api == 'Access denied': self.fail(_("API key invalid")) else: if self.fileID not in api: self.offline() self.data = api[self.fileID] if self.data[0] != "online": self.offline() pyfile.name = html_unescape(self.data[2]) # self.pyfile.name = self.get_file_name() if self.premium: self.handlePremium() else: self.handleFree()
def process(self, pyfile): html = self.load(pyfile.url, decode=True) if "watch-player-unavailable" in html: self.offline() if "We have been receiving a large volume of requests from your network." in html: self.tempOffline() #get config use3d = self.getConf("3d") if use3d: quality = {"sd":82,"hd":84,"fullhd":85,"240p":83,"360p":82,"480p":82,"720p":84,"1080p":85,"3072p":85} else: quality = {"sd":18,"hd":22,"fullhd":37,"240p":5,"360p":18,"480p":35,"720p":22,"1080p":37,"3072p":38} desired_fmt = self.getConf("fmt") if desired_fmt and desired_fmt not in self.formats: self.logWarning("FMT %d unknown - using default." % desired_fmt) desired_fmt = 0 if not desired_fmt: desired_fmt = quality.get(self.getConf("quality"), 18) #parse available streams streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",', html).group(1) streams = [x.split('\u0026') for x in streams.split(',')] streams = [dict((y.split('=',1)) for y in x) for x in streams] streams = [(int(x['itag']), "%s&signature=%s" % (unquote(x['url']), x['sig'])) for x in streams] #self.logDebug("Found links: %s" % streams) self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams]) #build dictionary of supported itags (3D/2D) allowed = lambda x: self.getConfig(self.formats[x][0]) streams = [x for x in streams if x[0] in self.formats and allowed(x[0])] if not streams: self.fail("No available stream meets your preferences") fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams) self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt], "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT ") ) #return fmt nearest to quality index if desired_fmt in fmt_dict and allowed(desired_fmt): fmt = desired_fmt else: sel = lambda x: self.formats[x][3] #select quality index comp = lambda x, y: abs(sel(x) - sel(y)) self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()]) fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) else y, fmt_dict.keys()) self.logDebug("Chosen fmt: %s" % fmt) url = fmt_dict[fmt] self.logDebug("URL: %s" % url) #set file name file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv" file_name_pattern = '<meta name="title" content="(.+?)">' name = re.search(file_name_pattern, html).group(1).replace("/", "") + file_suffix pyfile.name = html_unescape(name) self.download(url)
def handleSeason(self, url): src = self.getSJSrc(url) soup = BeautifulSoup(src) post = soup.find("div", attrs={"class": "post-content"}) ps = post.findAll("p") seasonName = html_unescape(soup.find("a", attrs={"rel": "bookmark"}).string).replace("–", "-") groups = {} gid = -1 for p in ps: if re.search("<strong>Sprache|<strong>Format", str(p)): var = p.findAll("strong") opts = {"Sprache": "", "Format": ""} for v in var: n = html_unescape(v.string).strip() n = re.sub(r"^([:]?)(.*?)([:]?)$", r"\2", n) if n.strip() not in opts: continue val = v.nextSibling if not val: continue val = val.replace("|", "").strip() val = re.sub(r"^([:]?)(.*?)([:]?)$", r"\2", val) opts[n.strip()] = val.strip() gid += 1 groups[gid] = {} groups[gid]["ep"] = {} groups[gid]["opts"] = opts elif re.search("<strong>Download:", str(p)): parts = str(p).split("<br />") if re.search("<strong>", parts[0]): ename = ( re.search("<strong>(.*?)</strong>", parts[0]) .group(1) .strip() .decode("utf-8") .replace("–", "-") ) groups[gid]["ep"][ename] = {} parts.remove(parts[0]) for part in parts: hostername = re.search(r" \| ([-a-zA-Z0-9]+\.\w+)", part) if hostername: hostername = hostername.group(1) groups[gid]["ep"][ename][hostername] = [] links = re.findall('href="(.*?)"', part) for link in links: groups[gid]["ep"][ename][hostername].append(link + "#hasName") links = [] for g in groups.values(): for ename in g["ep"]: links.extend(self.getpreferred(g["ep"][ename])) if self.config.get("changeNameSJ") == "Episode": self.packages.append((ename, links, ename)) links = [] package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"]) if self.config.get("changeNameSJ") == "Format": self.packages.append((package, links, package)) links = [] if (self.config.get("changeNameSJ") == "Packagename") or re.search("#hasName", url): self.packages.append((self.pyfile.package().name, links, self.pyfile.package().name)) elif (self.config.get("changeNameSJ") == "Season") or not re.search("#hasName", url): self.packages.append((seasonName, links, seasonName))
def fixurl(url): return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip().rstrip('/')
def process(self, pyfile): html = self.load(pyfile.url, decode=True) if re.search(r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html): self.offline() if "We have been receiving a large volume of requests from your network." in html: self.tempOffline() #get config use3d = self.getConfig("3d") if use3d: quality = {"sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82, "480p": 82, "720p": 84, "1080p": 85, "3072p": 85} else: quality = {"sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18, "480p": 35, "720p": 22, "1080p": 37, "3072p": 38} desired_fmt = self.getConfig("fmt") if desired_fmt and desired_fmt not in self.formats: self.logWarning("FMT %d unknown - using default." % desired_fmt) desired_fmt = 0 if not desired_fmt: desired_fmt = quality.get(self.getConfig("quality"), 18) #parse available streams streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",', html).group(1) streams = [x.split('\u0026') for x in streams.split(',')] streams = [dict((y.split('=', 1)) for y in x) for x in streams] streams = [(int(x['itag']), unquote(x['url'])) for x in streams] #self.logDebug("Found links: %s" % streams) self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams]) #build dictionary of supported itags (3D/2D) allowed = lambda x: self.getConfig(self.formats[x][0]) streams = [x for x in streams if x[0] in self.formats and allowed(x[0])] if not streams: self.fail("No available stream meets your preferences") fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams) self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt], "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT ")) #return fmt nearest to quality index if desired_fmt in fmt_dict and allowed(desired_fmt): fmt = desired_fmt else: sel = lambda x: self.formats[x][3] # select quality index comp = lambda x, y: abs(sel(x) - sel(y)) self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()]) fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) else y, fmt_dict.keys()) self.logDebug("Chosen fmt: %s" % fmt) url = fmt_dict[fmt] self.logDebug("URL: %s" % url) #set file name file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv" file_name_pattern = '<meta name="title" content="(.+?)">' name = re.search(file_name_pattern, html).group(1).replace("/", "") # Cleaning invalid characters from the file name name = name.encode('ascii', 'replace') pyfile.name = html_unescape(name) time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url) ffmpeg = which("ffmpeg") if ffmpeg and time: m, s = time.groups()[1:] if not m: m = "0" pyfile.name += " (starting at %s:%s)" % (m, s) pyfile.name += file_suffix filename = self.download(url) if ffmpeg and time: inputfile = filename + "_" os.rename(filename, inputfile) subprocess.call([ ffmpeg, "-ss", "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy", "-acodec", "copy", filename]) os.remove(inputfile)
def handle_captcha(self, inputs): m = re.search(self.CAPTCHA_PATTERN, self.html) if m: captcha_url = m.group(1) inputs['code'] = self.captcha.decrypt(captcha_url) return m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) if m: captcha_div = m.group(1) numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) self.log_debug(captcha_div) inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) self.log_debug("Captcha code: %s" % inputs['code'], numerals) return recaptcha = ReCaptcha(self) try: captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) except Exception: captcha_key = recaptcha.detect_key() else: self.log_debug("ReCaptcha key: %s" % captcha_key) if captcha_key: inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) return solvemedia = SolveMedia(self) try: captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) except Exception: captcha_key = solvemedia.detect_key() else: self.log_debug("SolveMedia key: %s" % captcha_key) if captcha_key: inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key)
def process(self, pyfile): html = self.load(pyfile.url, decode=True) if "watch-player-unavailable" in html: self.offline() if "We have been receiving a large volume of requests from your network." in html: self.tempOffline() #videoId = pyfile.url.split("v=")[1].split("&")[0] #videoHash = re.search(r'&t=(.+?)&', html).group(1) file_name_pattern = '<meta name="title" content="(.+?)">' quality = self.getConf("quality") desired_fmt = 18 if quality == "sd": desired_fmt = 18 elif quality == "hd": desired_fmt = 22 elif quality == "fullhd": desired_fmt = 37 if self.getConfig("fmt"): desired_fmt = self.getConf("fmt") flashvars = re.search(r'flashvars=\\"(.*?)\\"', html) flashvars = unquote(flashvars.group(1)) fmts = re.findall(r'url=(.*?)%3B.*?itag=(\d+)', flashvars) fmt_dict = {} for url, fmt in fmts: fmt = int(fmt) fmt_dict[fmt] = unquote(url) self.logDebug("Found links: %s" % fmt_dict) for fmt in fmt_dict.keys(): if fmt not in self.formats: self.logDebug("FMT not supported: %s" % fmt) del fmt_dict[fmt] allowed = lambda x: self.getConfig(self.formats[x][0]) sel = lambda x: self.formats[x][3] #select quality index comp = lambda x, y: abs(sel(x) - sel(y)) #return fmt nearest to quali index fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) and allowed(x) else y, fmt_dict.keys()) self.logDebug("Choose fmt: %s" % fmt) file_suffix = ".flv" if fmt in self.formats: file_suffix = self.formats[fmt][0] name = re.search(file_name_pattern, html).group(1).replace("/", "") + file_suffix pyfile.name = html_unescape(name) self.download(fmt_dict[fmt])
def process(self, pyfile): html = self.load(pyfile.url, decode=True) if '<h1 id="unavailable-message" class="message">' in html: self.offline() if "We have been receiving a large volume of requests from your network." in html: self.tempOffline() #get config use3d = self.getConf("3d") if use3d: quality = { "sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82, "480p": 82, "720p": 84, "1080p": 85, "3072p": 85 } else: quality = { "sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18, "480p": 35, "720p": 22, "1080p": 37, "3072p": 38 } desired_fmt = self.getConf("fmt") if desired_fmt and desired_fmt not in self.formats: self.logWarning("FMT %d unknown - using default." % desired_fmt) desired_fmt = 0 if not desired_fmt: desired_fmt = quality.get(self.getConf("quality"), 18) #parse available streams streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",', html).group(1) streams = [x.split('\u0026') for x in streams.split(',')] streams = [dict((y.split('=', 1)) for y in x) for x in streams] streams = [(int(x['itag']), "%s&signature=%s" % (unquote(x['url']), x['sig'])) for x in streams] #self.logDebug("Found links: %s" % streams) self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams]) #build dictionary of supported itags (3D/2D) allowed = lambda x: self.getConfig(self.formats[x][0]) streams = [ x for x in streams if x[0] in self.formats and allowed(x[0]) ] if not streams: self.fail("No available stream meets your preferences") fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams) self.logDebug( "DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt], "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT ")) #return fmt nearest to quality index if desired_fmt in fmt_dict and allowed(desired_fmt): fmt = desired_fmt else: sel = lambda x: self.formats[x][3] #select quality index comp = lambda x, y: abs(sel(x) - sel(y)) self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()]) fmt = reduce( lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) else y, fmt_dict.keys()) self.logDebug("Chosen fmt: %s" % fmt) url = fmt_dict[fmt] self.logDebug("URL: %s" % url) #set file name file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv" file_name_pattern = '<meta name="title" content="(.+?)">' name = re.search(file_name_pattern, html).group(1).replace("/", "") pyfile.name = html_unescape(name) time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url) ffmpeg = which("ffmpeg") if ffmpeg and time: m, s = time.groups()[1:] if not m: m = "0" pyfile.name += " (starting at %s:%s)" % (m, s) pyfile.name += file_suffix filename = self.download(url) if ffmpeg and time: inputfile = filename + "_" os.rename(filename, inputfile) subprocess.call([ ffmpeg, "-ss", "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy", "-acodec", "copy", filename ]) os.remove(inputfile)
def process(self, pyfile): html = self.load(pyfile.url, decode=True) if "watch-player-unavailable" in html: self.offline() if "We have been receiving a large volume of requests from your network." in html: self.tempOffline() #videoId = pyfile.url.split("v=")[1].split("&")[0] #videoHash = re.search(r'&t=(.+?)&', html).group(1) file_name_pattern = '<meta name="title" content="(.+?)">' quality = self.getConf("quality") desired_fmt = 18 if quality == "sd": desired_fmt = 18 elif quality == "hd": desired_fmt = 22 elif quality == "fullhd": desired_fmt = 37 if self.getConfig("fmt"): desired_fmt = self.getConf("fmt") flashvars = re.search(r'flashvars=\\"(.*?)\\"', html) flashvars = unquote(flashvars.group(1)) fmts = re.findall(r'url=(.*?)%3B.*?itag=(\d+)', flashvars) fmt_dict = {} for url, fmt in fmts: fmt = int(fmt) fmt_dict[fmt] = unquote(url) self.logDebug("Found links: %s" % fmt_dict) for fmt in fmt_dict.keys(): if fmt not in self.formats: self.logDebug("FMT not supported: %s" % fmt) del fmt_dict[fmt] allowed = lambda x: self.getConfig(self.formats[x][0]) sel = lambda x: self.formats[x][3] #select quality index comp = lambda x, y: abs(sel(x) - sel(y)) #return fmt nearest to quali index fmt = reduce( lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) and allowed(x) else y, fmt_dict.keys()) self.logDebug("Choose fmt: %s" % fmt) file_suffix = ".flv" if fmt in self.formats: file_suffix = self.formats[fmt][0] name = re.search(file_name_pattern, html).group(1).replace( "/", "") + file_suffix pyfile.name = html_unescape(name) self.download(fmt_dict[fmt])