def fixurl(self, url, baseurl=None, unquote=True): url = fixurl(url, unquote=True) baseurl = fixurl(baseurl or self.pyfile.url, unquote=True) if not urllib.parse.urlparse(url).scheme: url_p = urllib.parse.urlparse(baseurl) baseurl = "{}://{}".format(url_p.scheme, url_p.netloc) url = urllib.parse.urljoin(baseurl, url) return fixurl(url, unquote)
def get_info(cls, url="", html=""): url = fixurl(url, unquote=True) info = { "name": parse.name(url), "hash": {}, "pattern": {}, "size": 0, "status": 7 if url else 8, "url": replace_patterns(url, cls.URL_REPLACEMENTS), } try: info["pattern"] = re.match(cls.__pattern__, url).groupdict() except Exception: pass return info
def upload( self, path, url, get={}, ref=True, cookies=True, just_header=False, decode=True, redirect=True, req=None, ): # TODO: This should really go to HTTPRequest.py """ Uploads a file at url and returns response content. :param url: :param get: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Response content """ if self.pyload.debug: self.log_debug( "UPLOAD URL " + url, *[ "{}={}".format(key, value) for key, value in locals().items() if key not in ("self", "url", "_[1]") ], ) with open(path, mode="rb") as fp: url = fixurl(url, unquote=True) #: Recheck in 0.6.x if req is False: req = get_request() elif not req: req = self.req if isinstance(cookies, list): set_cookies(req.cj, cookies) # NOTE: req can be a HTTPRequest or a Browser object http_req = self.req.http if hasattr(self.req, "http") else self.req if not redirect: http_req.c.setopt(pycurl.FOLLOWLOCATION, 0) elif isinstance(redirect, int): http_req.c.setopt(pycurl.MAXREDIRS, redirect) if isinstance(ref, str): http_req.last_url = ref http_req.set_request_context(url, get, {}, bool(ref), bool(cookies), False) http_req.c.setopt(pycurl.HTTPHEADER, http_req.request_headers) http_req.response_header = b"" http_req.c.setopt(pycurl.UPLOAD, 1) http_req.c.setopt(pycurl.READFUNCTION, fp.read) http_req.c.setopt(pycurl.INFILESIZE, os.path.getsize(path)) if just_header: http_req.c.setopt(pycurl.FOLLOWLOCATION, 0) http_req.c.setopt(pycurl.NOBODY, 1) http_req.c.perform() http_req.c.setopt(pycurl.FOLLOWLOCATION, 1) http_req.c.setopt(pycurl.NOBODY, 0) else: http_req.c.perform() http_req.c.setopt(pycurl.UPLOAD, 0) http_req.c.setopt(pycurl.INFILESIZE, 0) http_req.c.setopt(pycurl.POSTFIELDS, "") http_req.last_effective_url = http_req.c.getinfo(pycurl.EFFECTIVE_URL) http_req.add_cookies() http_req.code = http_req.verify_header() html = http_req.response_header if just_header else http_req.get_response() http_req.rep.close() http_req.rep = None if decode is True: html = http_req.decode_response(html) if not redirect: http_req.c.setopt(pycurl.FOLLOWLOCATION, 1) elif isinstance(redirect, int): maxredirs = ( self.pyload.api.get_config_value( "UserAgentSwitcher", "maxredirs", "plugin" ) or 5 ) # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.MAXREDIRS, maxredirs) if decode: html = purge.unescape(html) self.last_html = html if self.pyload.debug: self.dump_html() # TODO: Move to network in 0.6.x header = {"code": req.code, "url": req.last_effective_url} # NOTE: req can be a HTTPRequest or a Browser object header.update(parse_html_header(http_req.response_header)) self.last_header = header if just_header: return header else: return html
def load( self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, redirect=True, req=None, ): """ Load content at url and returns it. :param url: :param get: :param post: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Loaded content """ if self.pyload.debug: self.log_debug( "LOAD URL " + url, *[ "{}={}".format(key, value) for key, value in locals().items() if key not in ("self", "url", "_[1]") ], ) url = fixurl(url, unquote=True) #: Recheck in 0.6.x if req is False: req = get_request() elif not req: req = self.req # TODO: Move to network in 0.6.x if isinstance(cookies, list): set_cookies(req.cj, cookies) http_req = self.req.http if hasattr(self.req, "http") else self.req # TODO: Move to network in 0.6.x if not redirect: # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.FOLLOWLOCATION, 0) elif type(redirect) is int: # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.MAXREDIRS, redirect) # TODO: Move to network in 0.6.x if isinstance(ref, str): req.last_url = ref html = req.load( url, get, post, bool(ref), bool(cookies), just_header, multipart, decode is True, ) # TODO: Fix network multipart in 0.6.x # TODO: Move to network in 0.6.x if not redirect: # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.FOLLOWLOCATION, 1) elif type(redirect) is int: maxredirs = ( self.pyload.api.get_config_value( "UserAgentSwitcher", "maxredirs", "plugin" ) or 5 ) # NOTE: req can be a HTTPRequest or a Browser object http_req.c.setopt(pycurl.MAXREDIRS, maxredirs) # TODO: Move to network in 0.6.x if decode: html = purge.unescape(html) self.last_html = html if self.pyload.debug: self.dump_html() # TODO: Move to network in 0.6.x header = {"code": req.code, "url": req.last_effective_url} header.update(parse_html_header(http_req.response_header)) self.last_header = header if just_header: return header else: return html