Python decode_html_entities Exemples, svtplay_dl.utils.text.decode_html_entities Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : __init__.py Projet : olof/debian-svtplay-dl

    def sami(self, subdata):
        text = subdata.text
        text = re.sub(r'&', '&amp;', text)
        tree = ET.fromstring(text)
        allsubs = tree.findall(".//Subtitle")
        subs = ""
        increase = 0
        for sub in allsubs:
            try:
                number = int(sub.attrib["SpotNumber"])
            except ValueError:
                number = int(re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1))
                increase += 1
            n = number + increase

            texts = sub.findall(".//Text")
            all = ""
            for text in texts:
                line = ""
                for txt in text.itertext():
                    line += "{}".format(txt)
                all += "{}\n".format(decode_html_entities(line.lstrip()))
            subs += "{}\n{} --> {}\n{}\n".format(n, timecolon(sub.attrib["TimeIn"]), timecolon(sub.attrib["TimeOut"]), all)
        subs = re.sub('&amp;', r'&', subs)
        return subs

Exemple #2

0

Afficher le fichier

Fichier : expressen.py Projet : olof/debian-svtplay-dl

    def get(self):
        data = self.get_urldata()

        match = re.search('="(https://www.expressen.se/tvspelare[^"]+)"', data)
        if not match:
            log.error("Can't find video id")
            return
        url = decode_html_entities(match.group(1))
        data = self.http.request("get", url)

        match = re.search("window.Player.settings = ({.*});", data.text)
        if not match:
            log.error("Can't find json info.")

        dataj = json.loads(match.group(1))
        if "streams" in dataj:
            if "iPad" in dataj["streams"]:
                streams = hlsparse(self.config, self.http.request("get", dataj["streams"]["iPad"]),
                                   dataj["streams"]["iPad"], output=self.output)
                for n in list(streams.keys()):
                    yield streams[n]
            if "hashHls" in dataj["streams"]:
                streams = hlsparse(self.config, self.http.request("get", dataj["streams"]["hashHls"]),
                                   dataj["streams"]["hashHls"], output=self.output)
                for n in list(streams.keys()):
                    yield streams[n]

Exemple #3

0

Afficher le fichier

    def sami(self, subdata):
        text = subdata.text
        text = re.sub(r'&', '&amp;', text)
        tree = ET.fromstring(text)
        subt = tree.find("Font")
        subs = ""
        n = 0
        for i in subt.getiterator():
            if i.tag == "Subtitle":
                n = i.attrib["SpotNumber"]

                if i.attrib["SpotNumber"] == "1":
                    subs += "%s\n%s --> %s\n" % (
                        i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]),
                        timecolon(i.attrib["TimeOut"]))
                else:
                    subs += "\n%s\n%s --> %s\n" % (
                        i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]),
                        timecolon(i.attrib["TimeOut"]))
            else:
                if int(n) > 0 and i.text:
                    subs += "%s\n" % decode_html_entities(i.text)

        subs = re.sub('&amp;', r'&', subs)
        return subs

Exemple #4

0

Afficher le fichier

Fichier : __init__.py Projet : toran4/svtplay-dl

    def sami(self, subdata):
        text = subdata.text
        text = re.sub(r"&", "&amp;", text)
        tree = ET.fromstring(text)
        allsubs = tree.findall(".//Subtitle")
        subs = ""
        increase = 0
        for sub in allsubs:
            try:
                number = int(sub.attrib["SpotNumber"])
            except ValueError:
                number = int(
                    re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1))
                increase += 1
            n = number + increase

            texts = sub.findall(".//Text")
            all = ""
            for text in texts:
                line = ""
                for txt in text.itertext():
                    line += f"{txt}"
                all += "{}\n".format(decode_html_entities(line.lstrip()))
            subs += "{}\n{} --> {}\n{}\n".format(
                n, timecolon(sub.attrib["TimeIn"]),
                timecolon(sub.attrib["TimeOut"]), all)
        subs = re.sub("&amp;", r"&", subs)
        return subs

Exemple #5

0

Afficher le fichier

Fichier : __init__.py Projet : toran4/svtplay-dl

 def smi(self, subdata):
     if requests_version < 0x20300:
         subdata = subdata.content.decode("latin")
     else:
         subdata.encoding = "ISO-8859-1"
         subdata = subdata.text
     ssubdata = StringIO(subdata)
     timea = 0
     number = 1
     data = None
     subs = ""
     TAG_RE = re.compile(r"<(?!\/?i).*?>")
     bad_char = re.compile(r"\x96")
     for i in ssubdata.readlines():
         i = i.rstrip()
         sync = re.search(r"<SYNC Start=(\d+)>", i)
         if sync:
             if int(sync.group(1)) != int(timea):
                 if data and data != "&nbsp;":
                     subs += "{}\n{} --> {}\n".format(
                         number, timestr(timea), timestr(sync.group(1)))
                     text = "%s\n" % TAG_RE.sub("",
                                                data.replace("<br>", "\n"))
                     text = decode_html_entities(text)
                     if text[len(text) - 2] != "\n":
                         text += "\n"
                     subs += text
                     number += 1
             timea = sync.group(1)
         text = re.search("<P Class=SVCC>(.*)", i)
         if text:
             data = text.group(1)
     recomp = re.compile(r"\r")
     text = bad_char.sub("-", recomp.sub("", subs))
     return text

Exemple #6

0

Afficher le fichier

Fichier : __init__.py Projet : olof/debian-svtplay-dl

 def smi(self, subdata):
     if requests_version < 0x20300:
         subdata = subdata.content.decode("latin")
     else:
         subdata.encoding = "ISO-8859-1"
         subdata = subdata.text
     ssubdata = StringIO(subdata)
     timea = 0
     number = 1
     data = None
     subs = ""
     TAG_RE = re.compile(r'<(?!\/?i).*?>')
     bad_char = re.compile(r'\x96')
     for i in ssubdata.readlines():
         i = i.rstrip()
         sync = re.search(r"<SYNC Start=(\d+)>", i)
         if sync:
             if int(sync.group(1)) != int(timea):
                 if data and data != "&nbsp;":
                     subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1)))
                     text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n"))
                     text = decode_html_entities(text)
                     if text[len(text) - 2] != "\n":
                         text += "\n"
                     subs += text
                     number += 1
             timea = sync.group(1)
         text = re.search("<P Class=SVCC>(.*)", i)
         if text:
             data = text.group(1)
     recomp = re.compile(r'\r')
     text = bad_char.sub('-', recomp.sub('', subs))
     return text

Exemple #7

0

Afficher le fichier

Fichier : oppetarkiv.py Projet : spaam/svtplay-dl

    def outputfilename(self, data):
        id = hashlib.sha256(data["programVersionId"].encode("utf-8")).hexdigest()[:7]
        self.output["id"] = id

        datatitle = re.search('data-title="([^"]+)"', self.get_urldata())
        if not datatitle:
            return None
        datat = decode_html_entities(datatitle.group(1))
        self.output["title"] = self.name(datat)
        self.seasoninfo(datat)

Exemple #8

0

Afficher le fichier

    def outputfilename(self, data):
        id = hashlib.sha256(data["programVersionId"].encode("utf-8")).hexdigest()[:7]
        self.output["id"] = id

        datatitle = re.search('data-title="([^"]+)"', self.get_urldata())
        if not datatitle:
            return None
        datat = decode_html_entities(datatitle.group(1))
        self.output["title"] = self.name(datat)
        self.seasoninfo(datat)

Exemple #9

0

Afficher le fichier

def filename(stream):
    if stream.output["title"] is None:
        data = ensure_unicode(stream.get_urldata())
        if data is None:
            return False
        match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S)
        if match:
            stream.config.set("output_auto", True)
            title_tag = decode_html_entities(match.group(1))
            stream.output["title"] = filenamify(title_tag)
    return True

Exemple #10

0

Afficher le fichier

Fichier : output.py Projet : olof/debian-svtplay-dl

def filename(stream):
    if stream.output["title"] is None:
        data = ensure_unicode(stream.get_urldata())
        if data is None:
            return False
        match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S)
        if match:
            stream.config.set("output_auto", True)
            title_tag = decode_html_entities(match.group(1))
            stream.output["title"] = filenamify(title_tag)
    return True

Exemple #11

0

Afficher le fichier

Fichier : aftonbladet.py Projet : olof/debian-svtplay-dl

    def get(self):
        data = self.get_urldata()

        match = re.search('data-player-config="([^"]+)"', data)
        if not match:
            match = re.search('data-svpPlayer-video="([^"]+)"', data)
            if not match:
                yield ServiceError("Can't find video info")
                return
        data = json.loads(decode_html_entities(match.group(1)))
        streams = hlsparse(self.config, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"], output=self.output)
        for n in list(streams.keys()):
            yield streams[n]

Exemple #12

0

Afficher le fichier

Fichier : aftonbladet.py Projet : olof/debian-svtplay-dl

    def get(self):
        data = self.get_urldata()

        match = re.search('data-player-config="([^"]+)"', data)
        if not match:
            match = re.search('data-svpPlayer-video="([^"]+)"', data)
            if not match:
                yield ServiceError("Can't find video info")
                return
        data = json.loads(decode_html_entities(match.group(1)))
        streams = hlsparse(self.config, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"], output=self.output)
        for n in list(streams.keys()):
            yield streams[n]

Exemple #13

0

Afficher le fichier

Fichier : expressen.py Projet : spaam/svtplay-dl

    def get(self):
        data = self.get_urldata()

        match = re.search('data-article-data="([^"]+)"', data)
        if not match:
            yield ServiceError("Cant find video file info")
            return
        data = decode_html_entities(match.group(1))
        janson = json.loads(data)
        self.config.set("live", janson["isLive"])

        streams = hlsparse(self.config, self.http.request("get", janson["stream"]), janson["stream"], output=self.output)
        for n in list(streams.keys()):
            yield streams[n]

Exemple #14

0

Afficher le fichier

Fichier : lemonwhale.py Projet : wirretheman/svtplay-dl

    def get(self):
        vid = self.get_vid()
        if not vid:
            yield ServiceError("Can't find video id")
            return

        url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(
            vid)
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]

        url = "http://ljsp.lwcdn.com/web/public/video.json?id={}&delivery=hls".format(
            decode_html_entities(vid))
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            for n in list(streams.keys()):
                yield streams[n]

Exemple #15

0

Afficher le fichier

Fichier : expressen.py Projet : wirretheman/svtplay-dl

    def get(self):
        data = self.get_urldata()

        match = re.search('data-article-data="([^"]+)"', data)
        if not match:
            yield ServiceError("Cant find video file info")
            return
        data = decode_html_entities(match.group(1))
        janson = json.loads(data)
        self.config.set("live", janson["isLive"])

        streams = hlsparse(self.config,
                           self.http.request("get", janson["stream"]),
                           janson["stream"],
                           output=self.output)
        for n in list(streams.keys()):
            yield streams[n]

Exemple #16

0

Afficher le fichier

Fichier : lemonwhale.py Projet : olof/debian-svtplay-dl

    def get(self):
        vid = self.get_vid()
        if not vid:
            yield ServiceError("Can't find video id")
            return

        url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(vid)
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            if streams:
                for n in list(streams.keys()):
                    yield streams[n]

        url = "http://ljsp.lwcdn.com/web/public/video.json?id={0}&delivery=hls".format(decode_html_entities(vid))
        data = self.http.request("get", url).text
        jdata = json.loads(data)
        if "videos" in jdata:
            streams = self.get_video(jdata)
            for n in list(streams.keys()):
                yield streams[n]

Exemple #17

0

Afficher le fichier

Fichier : __init__.py Projet : toran4/svtplay-dl

    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False

        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r"(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)",
                    i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(
                        r"(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)",
                        i)
                    hour1 = 0
                    hour2 = 0
                time = "{:02d}:{}:{} --> {:02d}:{}:{}\n".format(
                    hour1,
                    matchx.group("m1"),
                    matchx.group("s1").replace(".", ","),
                    hour2,
                    matchx.group("m2"),
                    matchx.group("s2").replace(".", ","),
                )
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.config.get("convert_subtitle_colors"):
                    colors = {
                        "30": "#000000",
                        "31": "#ff0000",
                        "32": "#00ff00",
                        "33": "#ffff00",
                        "34": "#0000ff",
                        "35": "#ff00ff",
                        "36": "#00ffff",
                        "37": "#ffffff",
                        "c.black": "#000000",
                        "c.red": "#ff0000",
                        "c.green": "#00ff00",
                        "c.yellow": "#ffff00",
                        "c.blue": "#0000ff",
                        "c.magenta": "#ff00ff",
                        "c.cyan": "#00ffff",
                        "c.gray": "#ffffff",
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = "<" + tag + ">"
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub("</.+>", "</font>", sub)
                else:
                    sub = re.sub("<[^>]*>", "", i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        return srt

Exemple #18

0

Afficher le fichier

    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(
                    r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)',
                    i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(
                        r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)',
                        i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(
                    hour1, matchx.group("m1"),
                    matchx.group("s1").replace(".", ","), hour2,
                    matchx.group("m2"),
                    matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.config.get("convert_subtitle_colors"):
                    colors = {
                        '30': '#000000',
                        '31': '#ff0000',
                        '32': '#00ff00',
                        '33': '#ffff00',
                        '34': '#0000ff',
                        '35': '#ff00ff',
                        '36': '#00ffff',
                        '37': '#ffffff'
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        return srt

Exemple #19

0

Afficher le fichier

Fichier : test_text.py Projet : wirretheman/svtplay-dl

 def test_decode_html(self):
     assert decode_html_entities("&lt;3 &amp;") == "<3 &"

Exemple #20

0

Afficher le fichier

Fichier : __init__.py Projet : olof/debian-svtplay-dl

    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","),
                                                                      hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.config.get("convert_subtitle_colors"):
                    colors = {
                        '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff',
                        '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff', 'c.black': '#000000', 'c.red': '#ff0000',
                        'c.green': '#00ff00', 'c.yellow': '#ffff00', 'c.blue': '#0000ff', 'c.magneta': '#ff00ff',
                        'c.cyan': '#00ffff', 'c.gray': '#ffffff',
                    }
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        return srt