Esempio n. 1
0
    def fixurl(self, url, baseurl=None, unquote=True):
        url = fixurl(url, unquote=True)
        baseurl = fixurl(baseurl or self.pyfile.url, unquote=True)

        if not urllib.parse.urlparse(url).scheme:
            url_p = urllib.parse.urlparse(baseurl)
            baseurl = "{}://{}".format(url_p.scheme, url_p.netloc)
            url = urllib.parse.urljoin(baseurl, url)

        return fixurl(url, unquote)
Esempio n. 2
0
    def get_info(cls, url="", html=""):
        url = fixurl(url, unquote=True)
        info = {
            "name": parse.name(url),
            "hash": {},
            "pattern": {},
            "size": 0,
            "status": 7 if url else 8,
            "url": replace_patterns(url, cls.URL_REPLACEMENTS),
        }

        try:
            info["pattern"] = re.match(cls.__pattern__, url).groupdict()

        except Exception:
            pass

        return info
Esempio n. 3
0
    def upload(
        self,
        path,
        url,
        get={},
        ref=True,
        cookies=True,
        just_header=False,
        decode=True,
        redirect=True,
        req=None,
    ):
        # TODO: This should really go to HTTPRequest.py
        """
        Uploads a file at url and returns response content.

        :param url:
        :param get:
        :param ref:
        :param cookies:
        :param just_header: If True only the header will be retrieved and returned as dict
        :param decode: Wether to decode the output according to http header, should be True in most cases
        :return: Response content
        """
        if self.pyload.debug:
            self.log_debug(
                "UPLOAD URL " + url,
                *[
                    "{}={}".format(key, value)
                    for key, value in locals().items()
                    if key not in ("self", "url", "_[1]")
                ],
            )

        with open(path, mode="rb") as fp:
            url = fixurl(url, unquote=True)  #: Recheck in 0.6.x

            if req is False:
                req = get_request()

            elif not req:
                req = self.req

            if isinstance(cookies, list):
                set_cookies(req.cj, cookies)

            # NOTE: req can be a HTTPRequest or a Browser object
            http_req = self.req.http if hasattr(self.req, "http") else self.req

            if not redirect:
                http_req.c.setopt(pycurl.FOLLOWLOCATION, 0)

            elif isinstance(redirect, int):
                http_req.c.setopt(pycurl.MAXREDIRS, redirect)

            if isinstance(ref, str):
                http_req.last_url = ref

            http_req.set_request_context(url, get, {}, bool(ref), bool(cookies), False)
            http_req.c.setopt(pycurl.HTTPHEADER, http_req.request_headers)
            http_req.response_header = b""

            http_req.c.setopt(pycurl.UPLOAD, 1)
            http_req.c.setopt(pycurl.READFUNCTION, fp.read)
            http_req.c.setopt(pycurl.INFILESIZE, os.path.getsize(path))

            if just_header:
                http_req.c.setopt(pycurl.FOLLOWLOCATION, 0)
                http_req.c.setopt(pycurl.NOBODY, 1)
                http_req.c.perform()

                http_req.c.setopt(pycurl.FOLLOWLOCATION, 1)
                http_req.c.setopt(pycurl.NOBODY, 0)

            else:
                http_req.c.perform()

            http_req.c.setopt(pycurl.UPLOAD, 0)
            http_req.c.setopt(pycurl.INFILESIZE, 0)

            http_req.c.setopt(pycurl.POSTFIELDS, "")
            http_req.last_effective_url = http_req.c.getinfo(pycurl.EFFECTIVE_URL)

            http_req.add_cookies()

            http_req.code = http_req.verify_header()

            html = http_req.response_header if just_header else http_req.get_response()

            http_req.rep.close()
            http_req.rep = None

            if decode is True:
                html = http_req.decode_response(html)

            if not redirect:
                http_req.c.setopt(pycurl.FOLLOWLOCATION, 1)

            elif isinstance(redirect, int):
                maxredirs = (
                    self.pyload.api.get_config_value(
                        "UserAgentSwitcher", "maxredirs", "plugin"
                    )
                    or 5
                )
                # NOTE: req can be a HTTPRequest or a Browser object
                http_req.c.setopt(pycurl.MAXREDIRS, maxredirs)

            if decode:
                html = purge.unescape(html)

            self.last_html = html

            if self.pyload.debug:
                self.dump_html()

            # TODO: Move to network in 0.6.x
            header = {"code": req.code, "url": req.last_effective_url}
            # NOTE: req can be a HTTPRequest or a Browser object
            header.update(parse_html_header(http_req.response_header))

            self.last_header = header

            if just_header:
                return header
            else:
                return html
Esempio n. 4
0
    def load(
        self,
        url,
        get={},
        post={},
        ref=True,
        cookies=True,
        just_header=False,
        decode=True,
        multipart=False,
        redirect=True,
        req=None,
    ):
        """
        Load content at url and returns it.

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param just_header: If True only the header will be retrieved and returned as dict
        :param decode: Wether to decode the output according to http header, should be True in most cases
        :return: Loaded content
        """
        if self.pyload.debug:
            self.log_debug(
                "LOAD URL " + url,
                *[
                    "{}={}".format(key, value)
                    for key, value in locals().items()
                    if key not in ("self", "url", "_[1]")
                ],
            )

        url = fixurl(url, unquote=True)  #: Recheck in 0.6.x

        if req is False:
            req = get_request()

        elif not req:
            req = self.req

        # TODO: Move to network in 0.6.x
        if isinstance(cookies, list):
            set_cookies(req.cj, cookies)

        http_req = self.req.http if hasattr(self.req, "http") else self.req

        # TODO: Move to network in 0.6.x
        if not redirect:
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.FOLLOWLOCATION, 0)

        elif type(redirect) is int:
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.MAXREDIRS, redirect)

        # TODO: Move to network in 0.6.x
        if isinstance(ref, str):
            req.last_url = ref

        html = req.load(
            url,
            get,
            post,
            bool(ref),
            bool(cookies),
            just_header,
            multipart,
            decode is True,
        )  # TODO: Fix network multipart in 0.6.x

        # TODO: Move to network in 0.6.x
        if not redirect:
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.FOLLOWLOCATION, 1)

        elif type(redirect) is int:
            maxredirs = (
                self.pyload.api.get_config_value(
                    "UserAgentSwitcher", "maxredirs", "plugin"
                )
                or 5
            )
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.MAXREDIRS, maxredirs)

        # TODO: Move to network in 0.6.x
        if decode:
            html = purge.unescape(html)

        self.last_html = html

        if self.pyload.debug:
            self.dump_html()

        # TODO: Move to network in 0.6.x
        header = {"code": req.code, "url": req.last_effective_url}
        header.update(parse_html_header(http_req.response_header))

        self.last_header = header

        if just_header:
            return header
        else:
            return html