def handle_web_links(self): self.log_debug("Search for Web links ") pack_links = [] pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"' ids = re.findall(pattern, self.data, re.I | re.S) self.log_debug(f"Decrypting {len(ids)} Web links") for idx, weblink_id in enumerate(ids): try: res = self.load( "http://linkcrypt.ws/out.html", post={"file": weblink_id} ) indexs = res.find("href=doNotTrack('") + 17 indexe = res.find("'", indexs) link2 = res[indexs:indexe] link2 = html_unescape(link2) pack_links.append(link2) except Exception as detail: self.log_debug( "Error decrypting Web link {}, {}".format(weblink_id, detail) ) return pack_links
def handle_captcha(self, inputs): m = search_pattern(self.CAPTCHA_PATTERN, self.data) if m is not None: captcha_url = urljoin(self.pyfile.url, m.group(1)) inputs["code"] = self.captcha.decrypt(captcha_url) return m = search_pattern(self.CAPTCHA_BLOCK_PATTERN, self.data, flags=re.S) if m is not None: captcha_div = m.group(1) numerals = re.findall( r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>", html_unescape(captcha_div), ) self.log_debug(captcha_div) code = inputs["code"] = "".join( a[1] for a in sorted(numerals, key=operator.itemgetter(0))) self.log_debug(f"Captcha code: {code}", numerals) return recaptcha = ReCaptcha(self.pyfile) try: captcha_key = search_pattern(self.RECAPTCHA_PATTERN, self.data).group(1) except (AttributeError, IndexError): captcha_key = recaptcha.detect_key() else: self.log_debug(f"ReCaptcha key: {captcha_key}") if captcha_key: self.captcha = recaptcha inputs["g-recaptcha-response"], _ = recaptcha.challenge( captcha_key) return solvemedia = SolveMedia(self.pyfile) try: captcha_key = search_pattern(self.SOLVEMEDIA_PATTERN, self.data).group(1) except (AttributeError, IndexError): captcha_key = solvemedia.detect_key() else: self.log_debug(f"SolveMedia key: {captcha_key}") if captcha_key: self.captcha = solvemedia ( inputs["adcopy_response"], inputs["adcopy_challenge"], ) = solvemedia.challenge(captcha_key)
def get_file_name(self): file_name_pattern = r"<h1 class=\'globalHd\'>(.*)</h1>" return html_unescape( re.search(file_name_pattern, self.data).group(1).replace("/", "") + ".flv" )
def upload( self, path, url, get={}, ref=True, cookies=True, just_header=False, decode=True, redirect=True, req=None, ): # TODO: This should really go to HTTPRequest.py """ Uploads a file at url and returns response content. :param url: :param get: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Response content """ if self.pyload.debug: self.log_debug( "UPLOAD URL " + url, *[ "{}={}".format(key, value) for key, value in locals().items() if key not in ("self", "url", "_[1]") ], ) with open(path, mode="rb") as fp: url = fixurl(url, unquote=True) #: Recheck in 0.6.x if req is False: req = get_request() req.set_option("timeout", 60) # TODO: Remove in 0.6.x elif not req: req = self.req if isinstance(cookies, list): set_cookies(req.cj, cookies) http_req = self.req.http if hasattr(self.req, "http") else self.req if not redirect: # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.FOLLOWLOCATION, 0) elif isinstance(redirect, int): # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.MAXREDIRS, redirect) if isinstance(ref, str): http_req.last_url = ref http_req.set_request_context(url, get, {}, bool(ref), bool(cookies), False) http_req.header = "" http_req.c.setopt(pycurl.HTTPHEADER, http_req.headers) http_req.c.setopt(pycurl.UPLOAD, 1) http_req.c.setopt(pycurl.READFUNCTION, fp.read) http_req.c.setopt(pycurl.INFILESIZE, os.path.getsize(path)) if just_header: http_req.c.setopt(pycurl.FOLLOWLOCATION, 0) http_req.c.setopt(pycurl.NOBODY, 1) http_req.c.perform() html = http_req.header http_req.c.setopt(pycurl.FOLLOWLOCATION, 1) http_req.c.setopt(pycurl.NOBODY, 0) else: http_req.c.perform() html = http_req.get_response() http_req.c.setopt(pycurl.UPLOAD, 0) http_req.c.setopt(pycurl.INFILESIZE, 0) http_req.c.setopt(pycurl.POSTFIELDS, "") http_req.last_effective_url = http_req.c.getinfo( pycurl.EFFECTIVE_URL) http_req.add_cookies() try: http_req.code = http_req.verify_header() finally: http_req.rep.close() http_req.rep = None if decode is True: html = http_req.decode_response(html) if not redirect: http_req.c.setopt(pycurl.FOLLOWLOCATION, 1) elif isinstance(redirect, int): maxredirs = (int( self.pyload.api.get_config_value("UserAgentSwitcher", "maxredirs", "plugin")) or 5) # TODO: Remove `int` in 0.6.x # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.MAXREDIRS, maxredirs) if decode: html = html_unescape(html) # TODO: Move to network in 0.6.x html = _decode(html, decode) self.last_html = html if self.pyload.debug: self.dump_html() # TODO: Move to network in 0.6.x header = {"code": req.code, "url": req.last_effective_url} # NOTE: req can be a HTTPRequest or a Browser object header.update(parse_html_header(http_req.header)) self.last_header = header if just_header: return header else: return html
def load( self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, redirect=True, req=None, ): """ Load content at url and returns it. :param url: :param get: :param post: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Loaded content """ if self.pyload.debug: self.log_debug( "LOAD URL " + url, *[ "{}={}".format(key, value) for key, value in locals().items() if key not in ("self", "url", "_[1]") ], ) url = fixurl(url, unquote=True) #: Recheck in 0.6.x if req is False: req = get_request() req.set_option("timeout", 60) # TODO: Remove in 0.6.x elif not req: req = self.req # TODO: Move to network in 0.6.x if isinstance(cookies, list): set_cookies(req.cj, cookies) http_req = self.req.http if hasattr(self.req, "http") else self.req # TODO: Move to network in 0.6.x if not redirect: # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.FOLLOWLOCATION, 0) elif isinstance(redirect, int): # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.MAXREDIRS, redirect) # TODO: Move to network in 0.6.x if isinstance(ref, str): req.last_url = ref html = req.load( url, get, post, bool(ref), bool(cookies), just_header, multipart, decode is True, ) # TODO: Fix network multipart in 0.6.x # TODO: Move to network in 0.6.x if not redirect: # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.FOLLOWLOCATION, 1) elif isinstance(redirect, int): maxredirs = (int( self.pyload.api.get_config_value("UserAgentSwitcher", "maxredirs", "plugin")) or 5) # TODO: Remove `int` in 0.6.x # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.MAXREDIRS, maxredirs) # TODO: Move to network in 0.6.x if decode: html = html_unescape(html) # TODO: Move to network in 0.6.x html = _decode(html, decode) self.last_html = html if self.pyload.debug: self.dump_html() # TODO: Move to network in 0.6.x header = {"code": req.code, "url": req.last_effective_url} # NOTE: req can be a HTTPRequest or a Browser object header.update(parse_html_header(http_req.header)) self.last_header = header if just_header: return header else: return html