Python html_unescape 예제들, pyload.utils.html_unescape Python 예제들

예제 #1

0

파일 보기

파일: LinkCryptWs.py 프로젝트: PaddyPat/pyload

    def handleWebLinks(self):
        self.logDebug("Search for Web links ")

        package_links = []
        pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"'
        ids = re.findall(pattern, self.html, re.I | re.S)

        self.logDebug("Decrypting %d Web links" % len(ids))

        for idx, weblink_id in enumerate(ids):
            try:
                self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id))

                res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id})

                indexs = res.find("window.location =") + 19
                indexe = res.find('"', indexs)

                link2 = res[indexs:indexe]

                self.logDebug(link2)

                link2 = html_unescape(link2)
                package_links.append(link2)

            except Exception, detail:
                self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))

예제 #2

0

파일 보기

파일: GoogledriveCom.py 프로젝트: PaddyPat/pyload

    def handleFree(self, pyfile):
        try:
            link1 = re.search(r'"(https://docs.google.com/uc\?id.*?export=download)",',
                              self.html.decode('unicode-escape')).group(1)

        except AttributeError:
            self.error(_("Hop #1 not found"))

        else:
            self.logDebug("Next hop: %s" % link1)

        self.html = self.load(link1).decode('unicode-escape')

        try:
            link2 = html_unescape(re.search(r'href="(/uc\?export=download.*?)">',
                                  self.html).group(1))

        except AttributeError:
            self.error(_("Hop #2 not found"))

        else:
            self.logDebug("Next hop: %s" % link2)

        link3 = self.load("https://docs.google.com" + link2, just_header=True)
        self.logDebug("DL-Link: %s" % link3['location'])

        self.link = link3['location']

예제 #3

0

파일 보기

파일: OneKhDe.py 프로젝트: Bobbaone/pyload

 def proceed(self, url, location):
     url = self.parent.url
     self.html = self.load(url)
     link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
     for id in link_ids:
         new_link = html_unescape(re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
         self.urls.append(new_link)

예제 #4

0

파일 보기

파일: GoogledriveCom.py 프로젝트: torrero007/pyload

    def handleFree(self, pyfile):
        try:
            link1 = re.search(
                r'"(https://docs.google.com/uc\?id.*?export=download)",',
                self.html.decode('unicode-escape')).group(1)

        except AttributeError:
            self.error(_("Hop #1 not found"))

        else:
            self.logDebug("Next hop: %s" % link1)

        self.html = self.load(link1).decode('unicode-escape')

        try:
            link2 = html_unescape(
                re.search(r'href="(/uc\?export=download.*?)">',
                          self.html).group(1))

        except AttributeError:
            self.error(_("Hop #2 not found"))

        else:
            self.logDebug("Next hop: %s" % link2)

        link3 = self.load("https://docs.google.com" + link2, just_header=True)
        self.logDebug("DL-Link: %s" % link3['location'])

        self.link = link3['location']

예제 #5

0

파일 보기

파일: LinkCryptWs.py 프로젝트: Arisharr/Download-Manager

    def handleWebLinks(self):
        self.logDebug("Search for Web links ")

        package_links = []
        pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"'
        ids = re.findall(pattern, self.html, re.I | re.S)

        self.logDebug("Decrypting %d Web links" % len(ids))

        for idx, weblink_id in enumerate(ids):
            try:
                self.logDebug("Decrypting Web link %d, %s" %
                              (idx + 1, weblink_id))

                res = self.load("http://linkcrypt.ws/out.html",
                                post={'file': weblink_id})

                indexs = res.find("window.location =") + 19
                indexe = res.find('"', indexs)

                link2 = res[indexs:indexe]

                self.logDebug(link2)

                link2 = html_unescape(link2)
                package_links.append(link2)

            except Exception, detail:
                self.logDebug("Error decrypting Web link %s, %s" %
                              (weblink_id, detail))

예제 #6

0

파일 보기

    def get_file_url(self):
        """ returns the absolute downloadable filepath
        """
        if not self.html:
            self.download_html()

        file_url = html_unescape(re.search(r'hashlink=(http.*?)"', self.html).group(1))

        return file_url

예제 #7

0

파일 보기

    def getPackageName(self):
        if hasattr(self, 'TITLE_PATTERN'):
            m = re.search(self.TITLE_PATTERN, self.html)
            if m:
                name = html_unescape(m.group('title').strip())
                self.logDebug("Found name [%s] in package info" % (name))
                return name

        return None

예제 #8

0

파일 보기

파일: SimpleCrypter.py 프로젝트: BlackSmith/pyload

    def getPackageName(self):
        if hasattr(self, 'TITLE_PATTERN'):
            m = re.search(self.TITLE_PATTERN, self.html)
            if m:
                name = html_unescape(m.group('title').strip())
                self.logDebug("Found name [%s] in package info" % (name))
                return name

        return None

예제 #9

0

파일 보기

 def proceed(self, url, location):
     url = self.parent.url
     self.html = self.load(url)
     link_ids = re.findall(
         r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
     for id in link_ids:
         new_link = html_unescape(
             re.search("width=\"100%\" src=\"(.*)\"></iframe>",
                       self.load("http://1kh.de/l/" + id)).group(1))
         self.urls.append(new_link)

예제 #10

0

파일 보기

파일: XFSHoster.py 프로젝트: Arisharr/Download-Manager

    def handleCaptcha(self, inputs):
        m = re.search(self.CAPTCHA_PATTERN, self.html)
        if m:
            captcha_url = m.group(1)
            inputs['code'] = self.decryptCaptcha(captcha_url)
            return 1

        m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S)
        if m:
            captcha_div = m.group(1)
            numerals = re.findall(
                r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>',
                html_unescape(captcha_div))

            self.logDebug(captcha_div)

            inputs['code'] = "".join(
                a[1] for a in sorted(numerals, key=lambda num: int(num[0])))

            self.logDebug("Captcha code: %s" % inputs['code'], numerals)
            return 2

        recaptcha = ReCaptcha(self)
        try:
            captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1)

        except Exception:
            captcha_key = recaptcha.detect_key()

        else:
            self.logDebug("ReCaptcha key: %s" % captcha_key)

        if captcha_key:
            inputs['recaptcha_response_field'], inputs[
                'recaptcha_challenge_field'] = recaptcha.challenge(captcha_key)
            return 3

        solvemedia = SolveMedia(self)
        try:
            captcha_key = re.search(self.SOLVEMEDIA_PATTERN,
                                    self.html).group(1)

        except Exception:
            captcha_key = solvemedia.detect_key()

        else:
            self.logDebug("SolveMedia key: %s" % captcha_key)

        if captcha_key:
            inputs['adcopy_response'], inputs[
                'adcopy_challenge'] = solvemedia.challenge(captcha_key)
            return 4

        return 0

예제 #11

0

파일 보기

파일: BasePlugin.py 프로젝트: starfighter77895/pyload

    def downloadFile(self, pyfile):
        url = pyfile.url

        for i in range(5):
            header = self.load(url, just_header=True)

            # self.load does not raise a BadHeader on 404 responses, do it here
            if 'code' in header and header['code'] == 404:
                raise ResponseException(404)

            if 'location' in header:
                self.logDebug("Location: " + header['location'])
                base = search(r'https?://[^/]+', url).group(0)
                if header['location'].startswith("http"):
                    url = unquote(header['location'])
                elif header['location'].startswith("/"):
                    url = base + unquote(header['location'])
                else:
                    url = '%s/%s' % (base, unquote(header['location']))
            else:
                break

        name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))

        if 'content-disposition' in header:
            self.logDebug("Content-Disposition: " +
                          header['content-disposition'])
            m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)",
                       header['content-disposition'])
            if m:
                disp = m.groupdict()
                self.logDebug(disp)
                if not disp['enc']:
                    disp['enc'] = 'utf-8'
                name = remove_chars(disp['name'], "\"';").strip()
                name = unicode(unquote(name), disp['enc'])

        if not name:
            name = url
        pyfile.name = name
        self.logDebug("Filename: %s" % pyfile.name)
        self.download(url, disposition=True)

예제 #12

0

파일 보기

파일: BasePlugin.py 프로젝트: JeRiKo1/pyload

    def downloadFile(self, pyfile):
        url = pyfile.url

        for _ in xrange(5):
            header = self.load(url, just_header=True)

            # self.load does not raise a BadHeader on 404 responses, do it here
            if "code" in header and header["code"] == 404:
                raise ResponseException(404)

            if "location" in header:
                self.logDebug("Location: " + header["location"])
                base = match(r"https?://[^/]+", url).group(0)
                if header["location"].startswith("http"):
                    url = unquote(header["location"])
                elif header["location"].startswith("/"):
                    url = base + unquote(header["location"])
                else:
                    url = "%s/%s" % (base, unquote(header["location"]))
            else:
                break

        name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))

        if "content-disposition" in header:
            self.logDebug("Content-Disposition: " + header["content-disposition"])
            m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header["content-disposition"])
            if m:
                disp = m.groupdict()
                self.logDebug(disp)
                if not disp["enc"]:
                    disp["enc"] = "utf-8"
                name = remove_chars(disp["name"], "\"';").strip()
                name = unicode(unquote(name), disp["enc"])

        if not name:
            name = url
        pyfile.name = name
        self.logDebug("Filename: %s" % pyfile.name)
        self.download(url, disposition=True)

예제 #13

0

파일 보기

파일: XFSHoster.py 프로젝트: PaddyPat/pyload

    def handleCaptcha(self, inputs):
        m = re.search(self.CAPTCHA_PATTERN, self.html)
        if m:
            captcha_url = m.group(1)
            inputs['code'] = self.decryptCaptcha(captcha_url)
            return 1

        m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S)
        if m:
            captcha_div = m.group(1)
            numerals    = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))

            self.logDebug(captcha_div)

            inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0])))

            self.logDebug("Captcha code: %s" % inputs['code'], numerals)
            return 2

        recaptcha = ReCaptcha(self)
        try:
            captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1)

        except Exception:
            captcha_key = recaptcha.detect_key()

        else:
            self.logDebug("ReCaptcha key: %s" % captcha_key)

        if captcha_key:
            inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key)
            return 3

        solvemedia = SolveMedia(self)
        try:
            captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1)

        except Exception:
            captcha_key = solvemedia.detect_key()

        else:
            self.logDebug("SolveMedia key: %s" % captcha_key)

        if captcha_key:
            inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key)
            return 4

        return 0

예제 #14

0

파일 보기

 def get_file_name(self):
     file_name_pattern = r"<h1 class='globalHd'>(.*)</h1>"
     return html_unescape(re.search(file_name_pattern, self.html).group(1).replace("/", "") + '.flv')

예제 #15

0

파일 보기

파일: YoutubeCom.py 프로젝트: Bobbaone/pyload

    def process(self, pyfile):
        pyfile.url = replace_patterns(pyfile.url, self.URL_REPLACEMENTS)
        html       = self.load(pyfile.url, decode=True)

        if re.search(r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html):
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()

        # get config
        use3d = self.getConfig('3d')

        if use3d:
            quality = {"sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82,
                       "480p": 82, "720p": 84, "1080p": 85, "3072p": 85}
        else:
            quality = {"sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18,
                       "480p": 35, "720p": 22, "1080p": 37, "3072p": 38}

        desired_fmt = self.getConfig('fmt')

        if not desired_fmt:
            desired_fmt = quality.get(self.getConfig('quality'), 18)

        elif desired_fmt not in self.formats:
            self.logWarning(_("FMT %d unknown, using default") % desired_fmt)
            desired_fmt = 0

        # parse available streams
        streams = re.search(r'"url_encoded_fmt_stream_map":"(.+?)",', html).group(1)
        streams = [x.split('\u0026') for x in streams.split(',')]
        streams = [dict((y.split('=', 1)) for y in x) for x in streams]
        streams = [(int(x['itag']), urllib.unquote(x['url'])) for x in streams]

        # self.logDebug("Found links: %s" % streams)

        self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams])

        # build dictionary of supported itags (3D/2D)
        allowed = lambda x: self.getConfig(self.formats[x][0])
        streams = [x for x in streams if x[0] in self.formats and allowed(x[0])]

        if not streams:
            self.fail(_("No available stream meets your preferences"))

        fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams)

        self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" %
                      (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt],
                       "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT "))

        # return fmt nearest to quality index
        if desired_fmt in fmt_dict and allowed(desired_fmt):
            fmt = desired_fmt
        else:
            sel  = lambda x: self.formats[x][3]  #: select quality index
            comp = lambda x, y: abs(sel(x) - sel(y))

            self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()])

            fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and
                         sel(x) > sel(y) else y, fmt_dict.keys())

        self.logDebug("Chosen fmt: %s" % fmt)

        url = fmt_dict[fmt]

        self.logDebug("URL: %s" % url)

        # set file name
        file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv"
        file_name_pattern = '<meta name="title" content="(.+?)">'
        name = re.search(file_name_pattern, html).group(1).replace("/", "")

        # Cleaning invalid characters from the file name
        name = name.encode('ascii', 'replace')
        for c in self.invalidChars:
            name = name.replace(c, '_')

        pyfile.name = html_unescape(name)

        time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url)
        ffmpeg = which("ffmpeg")
        if ffmpeg and time:
            m, s = time.groups()[1:]
            if m is None:
                m = "0"

            pyfile.name += " (starting at %s:%s)" % (m, s)

        pyfile.name += file_suffix
        filename     = self.download(url)

        if ffmpeg and time:
            inputfile = filename + "_"
            os.rename(filename, inputfile)

            subprocess.call([
                ffmpeg,
                "-ss", "00:%s:%s" % (m, s),
                "-i", inputfile,
                "-vcodec", "copy",
                "-acodec", "copy",
                filename])

            os.remove(inputfile)

예제 #16

0

파일 보기

    def process(self, pyfile):
        pyfile.url = replace_patterns(pyfile.url, self.URL_REPLACEMENTS)
        html = self.load(pyfile.url, decode=True)

        if re.search(
                r'<div id="player-unavailable" class="\s*player-width player-height\s*">',
                html):
            self.offline()

        if "We have been receiving a large volume of requests from your network." in html:
            self.tempOffline()

        # get config
        use3d = self.getConfig('3d')

        if use3d:
            quality = {
                "sd": 82,
                "hd": 84,
                "fullhd": 85,
                "240p": 83,
                "360p": 82,
                "480p": 82,
                "720p": 84,
                "1080p": 85,
                "3072p": 85
            }
        else:
            quality = {
                "sd": 18,
                "hd": 22,
                "fullhd": 37,
                "240p": 5,
                "360p": 18,
                "480p": 35,
                "720p": 22,
                "1080p": 37,
                "3072p": 38
            }

        desired_fmt = self.getConfig('fmt')

        if not desired_fmt:
            desired_fmt = quality.get(self.getConfig('quality'), 18)

        elif desired_fmt not in self.formats:
            self.logWarning(_("FMT %d unknown, using default") % desired_fmt)
            desired_fmt = 0

        # parse available streams
        streams = re.search(r'"url_encoded_fmt_stream_map":"(.+?)",',
                            html).group(1)
        streams = [x.split('\u0026') for x in streams.split(',')]
        streams = [dict((y.split('=', 1)) for y in x) for x in streams]
        streams = [(int(x['itag']), unquote(x['url'])) for x in streams]

        # self.logDebug("Found links: %s" % streams)

        self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams])

        # build dictionary of supported itags (3D/2D)
        allowed = lambda x: self.getConfig(self.formats[x][0])
        streams = [
            x for x in streams if x[0] in self.formats and allowed(x[0])
        ]

        if not streams:
            self.fail(_("No available stream meets your preferences"))

        fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d]
                        or streams)

        self.logDebug(
            "DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" %
            (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt],
             "" if desired_fmt in fmt_dict else "NOT ",
             "" if allowed(desired_fmt) else "NOT "))

        # return fmt nearest to quality index
        if desired_fmt in fmt_dict and allowed(desired_fmt):
            fmt = desired_fmt
        else:
            sel = lambda x: self.formats[x][3]  #: select quality index
            comp = lambda x, y: abs(sel(x) - sel(y))

            self.logDebug("Choosing nearest fmt: %s" %
                          [(x, allowed(x), comp(x, desired_fmt))
                           for x in fmt_dict.keys()])

            fmt = reduce(
                lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt)
                and sel(x) > sel(y) else y, fmt_dict.keys())

        self.logDebug("Chosen fmt: %s" % fmt)

        url = fmt_dict[fmt]

        self.logDebug("URL: %s" % url)

        # set file name
        file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv"
        file_name_pattern = '<meta name="title" content="(.+?)">'
        name = re.search(file_name_pattern, html).group(1).replace("/", "")

        # Cleaning invalid characters from the file name
        name = name.encode('ascii', 'replace')
        for c in self.invalidChars:
            name = name.replace(c, '_')

        pyfile.name = html_unescape(name)

        time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url)
        ffmpeg = which("ffmpeg")
        if ffmpeg and time:
            m, s = time.groups()[1:]
            if m is None:
                m = "0"

            pyfile.name += " (starting at %s:%s)" % (m, s)

        pyfile.name += file_suffix
        filename = self.download(url)

        if ffmpeg and time:
            inputfile = filename + "_"
            os.rename(filename, inputfile)

            subprocess.call([
                ffmpeg, "-ss",
                "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy",
                "-acodec", "copy", filename
            ])

            os.remove(inputfile)