Python html_unescape Exemples, pyload.core.utils.old.html_unescape Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : LinkCryptWs.py Projet : rootsradics/pyload

    def handle_web_links(self):
        self.log_debug("Search for Web links ")

        pack_links = []
        pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"'
        ids = re.findall(pattern, self.data, re.I | re.S)

        self.log_debug(f"Decrypting {len(ids)} Web links")

        for idx, weblink_id in enumerate(ids):
            try:
                res = self.load(
                    "http://linkcrypt.ws/out.html", post={"file": weblink_id}
                )

                indexs = res.find("href=doNotTrack('") + 17
                indexe = res.find("'", indexs)

                link2 = res[indexs:indexe]

                link2 = html_unescape(link2)
                pack_links.append(link2)

            except Exception as detail:
                self.log_debug(
                    "Error decrypting Web link {}, {}".format(weblink_id, detail)
                )

        return pack_links

Exemple #2

0

Afficher le fichier

Fichier : xfs_downloader.py Projet : saschawitte/pyload

    def handle_captcha(self, inputs):
        m = search_pattern(self.CAPTCHA_PATTERN, self.data)
        if m is not None:
            captcha_url = urljoin(self.pyfile.url, m.group(1))
            inputs["code"] = self.captcha.decrypt(captcha_url)
            return

        m = search_pattern(self.CAPTCHA_BLOCK_PATTERN, self.data, flags=re.S)
        if m is not None:
            captcha_div = m.group(1)
            numerals = re.findall(
                r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>",
                html_unescape(captcha_div),
            )

            self.log_debug(captcha_div)

            code = inputs["code"] = "".join(
                a[1] for a in sorted(numerals, key=operator.itemgetter(0)))

            self.log_debug(f"Captcha code: {code}", numerals)
            return

        recaptcha = ReCaptcha(self.pyfile)
        try:
            captcha_key = search_pattern(self.RECAPTCHA_PATTERN,
                                         self.data).group(1)

        except (AttributeError, IndexError):
            captcha_key = recaptcha.detect_key()

        else:
            self.log_debug(f"ReCaptcha key: {captcha_key}")

        if captcha_key:
            self.captcha = recaptcha
            inputs["g-recaptcha-response"], _ = recaptcha.challenge(
                captcha_key)
            return

        solvemedia = SolveMedia(self.pyfile)
        try:
            captcha_key = search_pattern(self.SOLVEMEDIA_PATTERN,
                                         self.data).group(1)

        except (AttributeError, IndexError):
            captcha_key = solvemedia.detect_key()

        else:
            self.log_debug(f"SolveMedia key: {captcha_key}")

        if captcha_key:
            self.captcha = solvemedia
            (
                inputs["adcopy_response"],
                inputs["adcopy_challenge"],
            ) = solvemedia.challenge(captcha_key)

Exemple #3

0

Afficher le fichier

Fichier : MyvideoDe.py Projet : zmike808/pyload

 def get_file_name(self):
     file_name_pattern = r"<h1 class=\'globalHd\'>(.*)</h1>"
     return html_unescape(
         re.search(file_name_pattern, self.data).group(1).replace("/", "") + ".flv"
     )

Exemple #4

0

Afficher le fichier

Fichier : plugin.py Projet : zmike808/pyload

    def upload(
        self,
        path,
        url,
        get={},
        ref=True,
        cookies=True,
        just_header=False,
        decode=True,
        redirect=True,
        req=None,
    ):
        # TODO: This should really go to HTTPRequest.py
        """
        Uploads a file at url and returns response content.

        :param url:
        :param get:
        :param ref:
        :param cookies:
        :param just_header: If True only the header will be retrieved and returned as dict
        :param decode: Wether to decode the output according to http header, should be True in most cases
        :return: Response content
        """
        if self.pyload.debug:
            self.log_debug(
                "UPLOAD URL " + url,
                *[
                    "{}={}".format(key, value)
                    for key, value in locals().items()
                    if key not in ("self", "url", "_[1]")
                ],
            )

        with open(path, mode="rb") as fp:
            url = fixurl(url, unquote=True)  #: Recheck in 0.6.x

            if req is False:
                req = get_request()
                req.set_option("timeout", 60)  # TODO: Remove in 0.6.x

            elif not req:
                req = self.req

            if isinstance(cookies, list):
                set_cookies(req.cj, cookies)

            http_req = self.req.http if hasattr(self.req, "http") else self.req

            if not redirect:
                # NOTE: req can be a HTTPRequest or a Browser object
                http_req.c.setopt(pycurl.FOLLOWLOCATION, 0)

            elif isinstance(redirect, int):
                # NOTE: req can be a HTTPRequest or a Browser object
                http_req.c.setopt(pycurl.MAXREDIRS, redirect)

            if isinstance(ref, str):
                http_req.last_url = ref

            http_req.set_request_context(url, get, {}, bool(ref),
                                         bool(cookies), False)
            http_req.header = ""
            http_req.c.setopt(pycurl.HTTPHEADER, http_req.headers)

            http_req.c.setopt(pycurl.UPLOAD, 1)
            http_req.c.setopt(pycurl.READFUNCTION, fp.read)
            http_req.c.setopt(pycurl.INFILESIZE, os.path.getsize(path))

            if just_header:
                http_req.c.setopt(pycurl.FOLLOWLOCATION, 0)
                http_req.c.setopt(pycurl.NOBODY, 1)
                http_req.c.perform()
                html = http_req.header

                http_req.c.setopt(pycurl.FOLLOWLOCATION, 1)
                http_req.c.setopt(pycurl.NOBODY, 0)

            else:
                http_req.c.perform()
                html = http_req.get_response()

            http_req.c.setopt(pycurl.UPLOAD, 0)
            http_req.c.setopt(pycurl.INFILESIZE, 0)

            http_req.c.setopt(pycurl.POSTFIELDS, "")
            http_req.last_effective_url = http_req.c.getinfo(
                pycurl.EFFECTIVE_URL)

            http_req.add_cookies()

            try:
                http_req.code = http_req.verify_header()

            finally:
                http_req.rep.close()
                http_req.rep = None

            if decode is True:
                html = http_req.decode_response(html)

            if not redirect:
                http_req.c.setopt(pycurl.FOLLOWLOCATION, 1)

            elif isinstance(redirect, int):
                maxredirs = (int(
                    self.pyload.api.get_config_value("UserAgentSwitcher",
                                                     "maxredirs", "plugin"))
                             or 5)  # TODO: Remove `int` in 0.6.x
                # NOTE: req can be a HTTPRequest or a Browser object
                http_req.c.setopt(pycurl.MAXREDIRS, maxredirs)

            if decode:
                html = html_unescape(html)

            # TODO: Move to network in 0.6.x
            html = _decode(html, decode)

            self.last_html = html

            if self.pyload.debug:
                self.dump_html()

            # TODO: Move to network in 0.6.x
            header = {"code": req.code, "url": req.last_effective_url}
            # NOTE: req can be a HTTPRequest or a Browser object
            header.update(parse_html_header(http_req.header))

            self.last_header = header

            if just_header:
                return header
            else:
                return html

Exemple #5

0

Afficher le fichier

Fichier : plugin.py Projet : zmike808/pyload

    def load(
        self,
        url,
        get={},
        post={},
        ref=True,
        cookies=True,
        just_header=False,
        decode=True,
        multipart=False,
        redirect=True,
        req=None,
    ):
        """
        Load content at url and returns it.

        :param url:
        :param get:
        :param post:
        :param ref:
        :param cookies:
        :param just_header: If True only the header will be retrieved and returned as dict
        :param decode: Wether to decode the output according to http header, should be True in most cases
        :return: Loaded content
        """
        if self.pyload.debug:
            self.log_debug(
                "LOAD URL " + url,
                *[
                    "{}={}".format(key, value)
                    for key, value in locals().items()
                    if key not in ("self", "url", "_[1]")
                ],
            )

        url = fixurl(url, unquote=True)  #: Recheck in 0.6.x

        if req is False:
            req = get_request()
            req.set_option("timeout", 60)  # TODO: Remove in 0.6.x

        elif not req:
            req = self.req

        # TODO: Move to network in 0.6.x
        if isinstance(cookies, list):
            set_cookies(req.cj, cookies)

        http_req = self.req.http if hasattr(self.req, "http") else self.req

        # TODO: Move to network in 0.6.x
        if not redirect:
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.FOLLOWLOCATION, 0)

        elif isinstance(redirect, int):
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.MAXREDIRS, redirect)

        # TODO: Move to network in 0.6.x
        if isinstance(ref, str):
            req.last_url = ref

        html = req.load(
            url,
            get,
            post,
            bool(ref),
            bool(cookies),
            just_header,
            multipart,
            decode is True,
        )  # TODO: Fix network multipart in 0.6.x

        # TODO: Move to network in 0.6.x
        if not redirect:
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.FOLLOWLOCATION, 1)

        elif isinstance(redirect, int):
            maxredirs = (int(
                self.pyload.api.get_config_value("UserAgentSwitcher",
                                                 "maxredirs", "plugin"))
                         or 5)  # TODO: Remove `int` in 0.6.x
            # NOTE: req can be a HTTPRequest or a Browser object
            http_req.c.setopt(pycurl.MAXREDIRS, maxredirs)

        # TODO: Move to network in 0.6.x
        if decode:
            html = html_unescape(html)

        # TODO: Move to network in 0.6.x
        html = _decode(html, decode)

        self.last_html = html

        if self.pyload.debug:
            self.dump_html()

        # TODO: Move to network in 0.6.x
        header = {"code": req.code, "url": req.last_effective_url}
        # NOTE: req can be a HTTPRequest or a Browser object
        header.update(parse_html_header(http_req.header))

        self.last_header = header

        if just_header:
            return header
        else:
            return html