def sami(self, subdata): text = subdata.text text = re.sub(r'&', '&', text) tree = ET.fromstring(text) allsubs = tree.findall(".//Subtitle") subs = "" increase = 0 for sub in allsubs: try: number = int(sub.attrib["SpotNumber"]) except ValueError: number = int(re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1)) increase += 1 n = number + increase texts = sub.findall(".//Text") all = "" for text in texts: line = "" for txt in text.itertext(): line += "{}".format(txt) all += "{}\n".format(decode_html_entities(line.lstrip())) subs += "{}\n{} --> {}\n{}\n".format(n, timecolon(sub.attrib["TimeIn"]), timecolon(sub.attrib["TimeOut"]), all) subs = re.sub('&', r'&', subs) return subs
def get(self): data = self.get_urldata() match = re.search('="(https://www.expressen.se/tvspelare[^"]+)"', data) if not match: log.error("Can't find video id") return url = decode_html_entities(match.group(1)) data = self.http.request("get", url) match = re.search("window.Player.settings = ({.*});", data.text) if not match: log.error("Can't find json info.") dataj = json.loads(match.group(1)) if "streams" in dataj: if "iPad" in dataj["streams"]: streams = hlsparse(self.config, self.http.request("get", dataj["streams"]["iPad"]), dataj["streams"]["iPad"], output=self.output) for n in list(streams.keys()): yield streams[n] if "hashHls" in dataj["streams"]: streams = hlsparse(self.config, self.http.request("get", dataj["streams"]["hashHls"]), dataj["streams"]["hashHls"], output=self.output) for n in list(streams.keys()): yield streams[n]
def sami(self, subdata): text = subdata.text text = re.sub(r'&', '&', text) tree = ET.fromstring(text) subt = tree.find("Font") subs = "" n = 0 for i in subt.getiterator(): if i.tag == "Subtitle": n = i.attrib["SpotNumber"] if i.attrib["SpotNumber"] == "1": subs += "%s\n%s --> %s\n" % ( i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: subs += "\n%s\n%s --> %s\n" % ( i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: if int(n) > 0 and i.text: subs += "%s\n" % decode_html_entities(i.text) subs = re.sub('&', r'&', subs) return subs
def sami(self, subdata): text = subdata.text text = re.sub(r"&", "&", text) tree = ET.fromstring(text) allsubs = tree.findall(".//Subtitle") subs = "" increase = 0 for sub in allsubs: try: number = int(sub.attrib["SpotNumber"]) except ValueError: number = int( re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1)) increase += 1 n = number + increase texts = sub.findall(".//Text") all = "" for text in texts: line = "" for txt in text.itertext(): line += f"{txt}" all += "{}\n".format(decode_html_entities(line.lstrip())) subs += "{}\n{} --> {}\n{}\n".format( n, timecolon(sub.attrib["TimeIn"]), timecolon(sub.attrib["TimeOut"]), all) subs = re.sub("&", r"&", subs) return subs
def smi(self, subdata): if requests_version < 0x20300: subdata = subdata.content.decode("latin") else: subdata.encoding = "ISO-8859-1" subdata = subdata.text ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r"<(?!\/?i).*?>") bad_char = re.compile(r"\x96") for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "{}\n{} --> {}\n".format( number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub("", data.replace("<br>", "\n")) text = decode_html_entities(text) if text[len(text) - 2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r"\r") text = bad_char.sub("-", recomp.sub("", subs)) return text
def smi(self, subdata): if requests_version < 0x20300: subdata = subdata.content.decode("latin") else: subdata.encoding = "ISO-8859-1" subdata = subdata.text ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r'<(?!\/?i).*?>') bad_char = re.compile(r'\x96') for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n")) text = decode_html_entities(text) if text[len(text) - 2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r'\r') text = bad_char.sub('-', recomp.sub('', subs)) return text
def outputfilename(self, data): id = hashlib.sha256(data["programVersionId"].encode("utf-8")).hexdigest()[:7] self.output["id"] = id datatitle = re.search('data-title="([^"]+)"', self.get_urldata()) if not datatitle: return None datat = decode_html_entities(datatitle.group(1)) self.output["title"] = self.name(datat) self.seasoninfo(datat)
def outputfilename(self, data): id = hashlib.sha256(data["programVersionId"].encode("utf-8")).hexdigest()[:7] self.output["id"] = id datatitle = re.search('data-title="([^"]+)"', self.get_urldata()) if not datatitle: return None datat = decode_html_entities(datatitle.group(1)) self.output["title"] = self.name(datat) self.seasoninfo(datat)
def filename(stream): if stream.output["title"] is None: data = ensure_unicode(stream.get_urldata()) if data is None: return False match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S) if match: stream.config.set("output_auto", True) title_tag = decode_html_entities(match.group(1)) stream.output["title"] = filenamify(title_tag) return True
def filename(stream): if stream.output["title"] is None: data = ensure_unicode(stream.get_urldata()) if data is None: return False match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S) if match: stream.config.set("output_auto", True) title_tag = decode_html_entities(match.group(1)) stream.output["title"] = filenamify(title_tag) return True
def get(self): data = self.get_urldata() match = re.search('data-player-config="([^"]+)"', data) if not match: match = re.search('data-svpPlayer-video="([^"]+)"', data) if not match: yield ServiceError("Can't find video info") return data = json.loads(decode_html_entities(match.group(1))) streams = hlsparse(self.config, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"], output=self.output) for n in list(streams.keys()): yield streams[n]
def get(self): data = self.get_urldata() match = re.search('data-player-config="([^"]+)"', data) if not match: match = re.search('data-svpPlayer-video="([^"]+)"', data) if not match: yield ServiceError("Can't find video info") return data = json.loads(decode_html_entities(match.group(1))) streams = hlsparse(self.config, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"], output=self.output) for n in list(streams.keys()): yield streams[n]
def get(self): data = self.get_urldata() match = re.search('data-article-data="([^"]+)"', data) if not match: yield ServiceError("Cant find video file info") return data = decode_html_entities(match.group(1)) janson = json.loads(data) self.config.set("live", janson["isLive"]) streams = hlsparse(self.config, self.http.request("get", janson["stream"]), janson["stream"], output=self.output) for n in list(streams.keys()): yield streams[n]
def get(self): vid = self.get_vid() if not vid: yield ServiceError("Can't find video id") return url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities( vid) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) if streams: for n in list(streams.keys()): yield streams[n] url = "http://ljsp.lwcdn.com/web/public/video.json?id={}&delivery=hls".format( decode_html_entities(vid)) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) for n in list(streams.keys()): yield streams[n]
def get(self): data = self.get_urldata() match = re.search('data-article-data="([^"]+)"', data) if not match: yield ServiceError("Cant find video file info") return data = decode_html_entities(match.group(1)) janson = json.loads(data) self.config.set("live", janson["isLive"]) streams = hlsparse(self.config, self.http.request("get", janson["stream"]), janson["stream"], output=self.output) for n in list(streams.keys()): yield streams[n]
def get(self): vid = self.get_vid() if not vid: yield ServiceError("Can't find video id") return url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(vid) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) if streams: for n in list(streams.keys()): yield streams[n] url = "http://ljsp.lwcdn.com/web/public/video.json?id={0}&delivery=hls".format(decode_html_entities(vid)) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) for n in list(streams.keys()): yield streams[n]
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r"(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)", i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search( r"(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)", i) hour1 = 0 hour2 = 0 time = "{:02d}:{}:{} --> {:02d}:{}:{}\n".format( hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","), ) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.config.get("convert_subtitle_colors"): colors = { "30": "#000000", "31": "#ff0000", "32": "#00ff00", "33": "#ffff00", "34": "#0000ff", "35": "#ff00ff", "36": "#00ffff", "37": "#ffffff", "c.black": "#000000", "c.red": "#ff0000", "c.green": "#00ff00", "c.yellow": "#ffff00", "c.blue": "#0000ff", "c.magenta": "#ff00ff", "c.cyan": "#00ffff", "c.gray": "#ffffff", } sub = i for tag, color in colors.items(): regex1 = "<" + tag + ">" replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub("</.+>", "</font>", sub) else: sub = re.sub("<[^>]*>", "", i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) return srt
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search( r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format( hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.config.get("convert_subtitle_colors"): colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff' } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) return srt
def test_decode_html(self): assert decode_html_entities("<3 &") == "<3 &"
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.config.get("convert_subtitle_colors"): colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff', 'c.black': '#000000', 'c.red': '#ff0000', 'c.green': '#00ff00', 'c.yellow': '#ffff00', 'c.blue': '#0000ff', 'c.magneta': '#ff00ff', 'c.cyan': '#00ffff', 'c.gray': '#ffffff', } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) return srt