def get_file_info(self): self.log_debug("URL: {0}".format(self.pyfile.url)) if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search( self.TEMP_OFFLINE_PATTERN, self.html): self.temp_offline() name, size, status = parseFileInfo(self)[:3] if status == 1: self.offline() elif status != 2: self.log_debug(self.file_info) self.parse_error(_('File info')) if name: self.pyfile.name = name else: self.pyfile.name = webpurge.escape( urlparse(self.pyfile.url).path.split("/")[-1]) if size: self.pyfile.size = size else: self.log_error(_("File size not parsed")) self.log_debug("FILE NAME: {0} FILE SIZE: {1}".format( self.pyfile.name, self.pyfile.size)) return self.file_info
def get_file_info(self): self.log_debug("URL: {0}".format(self.pyfile.url)) if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search( self.TEMP_OFFLINE_PATTERN, self.html): self.temp_offline() name, size, status = parseFileInfo(self)[:3] if status == 1: self.offline() elif status != 2: self.log_debug(self.file_info) self.parse_error(_('File info')) if name: self.pyfile.name = name else: self.pyfile.name = webpurge.escape( urlparse(self.pyfile.url).path.split("/")[-1]) if size: self.pyfile.size = size else: self.log_error(_("File size not parsed")) self.log_debug("FILE NAME: {0} FILE SIZE: {1}".format( self.pyfile.name, self.pyfile.size)) return self.file_info
def process(self, pyfile): self.prepare() pyfile.url = replace_patterns(pyfile.url, self.FILE_URL_REPLACEMENTS) if not re.match(self.__pattern__, pyfile.url): if self.premium: self.handle_overriden() else: self.fail(_("Only premium users can download from other hosters with {0}").format( self.HOSTER_NAME)) else: try: # Due to a 0.4.9 core bug self.load would use cookies even if # cookies=False. Workaround using get_url to avoid cookies. # Can be reverted in 0.5 as the cookies bug has been fixed. self.html = get_url(pyfile.url, decode=True) self.file_info = self.get_file_info() except PluginParseError: self.file_info = None self.location = self.get_direct_download_link() if not self.file_info: pyfile.name = webpurge.escape(unquote(urlparse( self.location if self.location else pyfile.url).path.split("/")[-1])) if self.location: self.start_download(self.location) elif self.premium: self.handle_premium() else: self.handle_free()
def process(self, pyfile): self.prepare() pyfile.url = replace_patterns(pyfile.url, self.FILE_URL_REPLACEMENTS) if not re.match(self.__pattern__, pyfile.url): if self.premium: self.handle_overriden() else: self.fail(_("Only premium users can download from other hosters with {0}").format( self.HOSTER_NAME)) else: try: # Due to a 0.4.9 core bug self.load would use cookies even if # cookies=False. Workaround using get_url to avoid cookies. # Can be reverted in 0.5 as the cookies bug has been fixed. self.html = get_url(pyfile.url, decode=True) self.file_info = self.get_file_info() except PluginParseError: self.file_info = None self.location = self.get_direct_download_link() if not self.file_info: pyfile.name = webpurge.escape(unquote(urlparse( self.location if self.location else pyfile.url).path.split("/")[-1])) if self.location: self.start_download(self.location) elif self.premium: self.handle_premium() else: self.handle_free()
def get_package_name(self): if hasattr(self, 'TITLE_PATTERN'): m = re.search(self.TITLE_PATTERN, self.html) if m: name = webpurge.escape(m.group('title').strip()) self.log_debug("Found name [{0}] in package info".format(name)) return name return None
def get_package_name(self): if hasattr(self, 'TITLE_PATTERN'): m = re.search(self.TITLE_PATTERN, self.html) if m: name = webpurge.escape(m.group('title').strip()) self.log_debug("Found name [{0}] in package info".format(name)) return name return None
def download_file(self, pyfile): url = pyfile.url for _ in range(5): header = self.load(url, just_header=True) # self.load does not raise a BadHeader on 404 responses, do it here if 'code' in header and header['code'] == 404: raise ResponseException(404) if 'location' in header: self.log_debug("Location: {0}".format(header['location'])) base = match(r'https?://[^/]+', url).group(0) if header['location'].startswith("http"): url = unquote(header['location']) elif header['location'].startswith("/"): url = base + unquote(header['location']) else: url = "{0}/{1}".format(base, unquote(header['location'])) else: break name = webpurge.escape(unquote(urlparse(url).path.split("/")[-1])) if 'content-disposition' in header: self.log_debug("Content-Disposition: {0}".format( header['content-disposition'])) m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition']) if m: disp = m.groupdict() self.log_debug(disp) if not disp['enc']: disp['enc'] = 'utf-8' name = purge.chars(disp['name'], "\"';").strip() name = str(unquote(name), disp['enc']) if not name: name = url pyfile.name = name self.log_debug("Filename: {0}".format(pyfile.name)) self.download(url, disposition=True)
def download_file(self, pyfile): url = pyfile.url for _ in range(5): header = self.load(url, just_header=True) # self.load does not raise a BadHeader on 404 responses, do it here if 'code' in header and header['code'] == 404: raise ResponseException(404) if 'location' in header: self.log_debug("Location: {0}".format(header['location'])) base = match(r'https?://[^/]+', url).group(0) if header['location'].startswith("http"): url = unquote(header['location']) elif header['location'].startswith("/"): url = base + unquote(header['location']) else: url = "{0}/{1}".format(base, unquote(header['location'])) else: break name = webpurge.escape(unquote(urlparse(url).path.split("/")[-1])) if 'content-disposition' in header: self.log_debug( "Content-Disposition: {0}".format(header['content-disposition'])) m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition']) if m: disp = m.groupdict() self.log_debug(disp) if not disp['enc']: disp['enc'] = 'utf-8' name = purge.chars(disp['name'], "\"';").strip() name = str(unquote(name), disp['enc']) if not name: name = url pyfile.name = name self.log_debug("Filename: {0}".format(pyfile.name)) self.download(url, disposition=True)
def handle_captcha(self, inputs): found = re.search(self.RECAPTCHA_URL_PATTERN, self.html) if found: recaptcha_key = unquote(found.group(1)) self.log_debug("RECAPTCHA KEY: {0}".format(recaptcha_key)) recaptcha = ReCaptcha(self) inputs['recaptcha_challenge_field'], inputs[ 'recaptcha_response_field'] = recaptcha.challenge( recaptcha_key) return 1 else: found = re.search(self.CAPTCHA_URL_PATTERN, self.html) if found: captcha_url = found.group(1) inputs['code'] = self.decrypt_captcha(captcha_url) return 2 else: found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, flags=re.S) if found: captcha_div = found.group(1) self.log_debug(captcha_div) numerals = re.findall( r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', webpurge.escape(captcha_div)) inputs['code'] = "".join( a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) self.log_debug("CAPTCHA", inputs['code'], numerals) return 3 else: found = re.search(self.SOLVEMEDIA_PATTERN, self.html) if found: captcha_key = found.group(1) captcha = SolveMedia(self) inputs['adcopy_challenge'], inputs[ 'adcopy_response'] = captcha.challenge(captcha_key) return 4 return 0
def handle_captcha(self, inputs): found = re.search(self.RECAPTCHA_URL_PATTERN, self.html) if found: recaptcha_key = unquote(found.group(1)) self.log_debug("RECAPTCHA KEY: {0}".format(recaptcha_key)) recaptcha = ReCaptcha(self) inputs['recaptcha_challenge_field'], inputs[ 'recaptcha_response_field'] = recaptcha.challenge(recaptcha_key) return 1 else: found = re.search(self.CAPTCHA_URL_PATTERN, self.html) if found: captcha_url = found.group(1) inputs['code'] = self.decrypt_captcha(captcha_url) return 2 else: found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, flags=re.S) if found: captcha_div = found.group(1) self.log_debug(captcha_div) numerals = re.findall( r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', webpurge.escape(captcha_div)) inputs['code'] = "".join(a[1] for a in sorted( numerals, key=lambda num: int(num[0]))) self.log_debug("CAPTCHA", inputs['code'], numerals) return 3 else: found = re.search(self.SOLVEMEDIA_PATTERN, self.html) if found: captcha_key = found.group(1) captcha = SolveMedia(self) inputs['adcopy_challenge'], inputs[ 'adcopy_response'] = captcha.challenge(captcha_key) return 4 return 0