class subtitle: def __init__(self, config, subtype, url, subfix=None, **kwargs): self.url = url self.subtitle = None self.config = config self.subtype = subtype self.http = HTTP(config) self.subfix = subfix self.bom = False self.output = kwargs.pop("output", None) self.kwargs = kwargs def __repr__(self): return f"<Subtitle(type={self.subtype}, url={self.url}>" def download(self): subdata = self.http.request("get", self.url) if subdata.status_code != 200: logging.warning("Can't download subtitle file") return data = None if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf": subdata.encoding = "utf-8" self.bom = True if self.subtype == "tt": data = self.tt(subdata) if self.subtype == "json": data = self.json(subdata) if self.subtype == "sami": data = self.sami(subdata) if self.subtype == "smi": data = self.smi(subdata) if self.subtype == "wrst": if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf": self.bom = True subdata.encoding = subdata.apparent_encoding data = self.wrst(subdata) if self.subtype == "wrstsegment": data = self.wrstsegment(subdata) if self.subtype == "raw": data = self.raw(subdata) if self.subtype == "stpp": data = self.stpp(subdata) if self.subfix: if self.config.get("get_all_subtitles"): if self.output["episodename"]: self.output["episodename"] = "{}-{}".format( self.output["episodename"], self.subfix) else: self.output["episodename"] = self.subfix if self.config.get("get_raw_subtitles"): subdata = self.raw(subdata) self.save_file(subdata, self.subtype) self.save_file(data, "srt") def save_file(self, data, subtype): file_d = output(self.output, self.config, subtype, mode="w", encoding="utf-8") if hasattr(file_d, "read") is False: return file_d.write(data) file_d.close() def raw(self, subdata): return subdata.text def tt(self, subdata): i = 1 subs = subdata.text return self._tt(subs, i) def _tt(self, subs, i): data = "" subdata = re.sub(' xmlns="[^"]+"', "", subs, count=1) tree = ET.XML(subdata) xml = tree.find("body").find("div") plist = list(xml.findall("p")) for node in plist: tag = norm(node.tag) if tag == "p" or tag == "span": begin = node.attrib["begin"] if not ("dur" in node.attrib): if "end" not in node.attrib: duration = node.attrib["duration"] else: duration = node.attrib["dur"] if not ("end" in node.attrib): begin2 = begin.split(":") duration2 = duration.split(":") try: sec = float(begin2[2]) + float(duration2[2]) except ValueError: sec = 0.000 end = "%02d:%02d:%06.3f" % (int(begin2[0]), int( begin2[1]), sec) else: end = node.attrib["end"] data += "{}\n{} --> {}\n".format(i, begin.replace(".", ","), end.replace(".", ",")) data = tt_text(node, data) data += "\n" i += 1 return data def json(self, subdata): data = json.loads(subdata.text) number = 1 subs = "" for i in data: subs += "{}\n{} --> {}\n".format(number, timestr(int(i["startMillis"])), timestr(int(i["endMillis"]))) subs += "%s\n\n" % i["text"] number += 1 return subs def sami(self, subdata): text = subdata.text text = re.sub(r"&", "&", text) tree = ET.fromstring(text) allsubs = tree.findall(".//Subtitle") subs = "" increase = 0 for sub in allsubs: try: number = int(sub.attrib["SpotNumber"]) except ValueError: number = int( re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1)) increase += 1 n = number + increase texts = sub.findall(".//Text") all = "" for text in texts: line = "" for txt in text.itertext(): line += f"{txt}" all += "{}\n".format(decode_html_entities(line.lstrip())) subs += "{}\n{} --> {}\n{}\n".format( n, timecolon(sub.attrib["TimeIn"]), timecolon(sub.attrib["TimeOut"]), all) subs = re.sub("&", r"&", subs) return subs def smi(self, subdata): if requests_version < 0x20300: subdata = subdata.content.decode("latin") else: subdata.encoding = "ISO-8859-1" subdata = subdata.text ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r"<(?!\/?i).*?>") bad_char = re.compile(r"\x96") for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "{}\n{} --> {}\n".format( number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub("", data.replace("<br>", "\n")) text = decode_html_entities(text) if text[len(text) - 2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r"\r") text = bad_char.sub("-", recomp.sub("", subs)) return text def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r"(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)", i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search( r"(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)", i) hour1 = 0 hour2 = 0 time = "{:02d}:{}:{} --> {:02d}:{}:{}\n".format( hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","), ) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.config.get("convert_subtitle_colors"): colors = { "30": "#000000", "31": "#ff0000", "32": "#00ff00", "33": "#ffff00", "34": "#0000ff", "35": "#ff00ff", "36": "#00ffff", "37": "#ffffff", "c.black": "#000000", "c.red": "#ff0000", "c.green": "#00ff00", "c.yellow": "#ffff00", "c.blue": "#0000ff", "c.magenta": "#ff00ff", "c.cyan": "#00ffff", "c.gray": "#ffffff", } sub = i for tag, color in colors.items(): regex1 = "<" + tag + ">" replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub("</.+>", "</font>", sub) else: sub = re.sub("<[^>]*>", "", i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) return srt def wrstsegment(self, subdata): time = 0 subs = [] for i in self.kwargs["m3u8"].media_segment: itemurl = get_full_url(i["URI"], self.url) cont = self.http.get(itemurl) if "cmore" in self.url: cont.encoding = "utf-8" if "mtgx" in self.url: cont.encoding = "utf-8" text = cont.text.split("\n") for t in text: # is in text[1] for tv4play, but this should be more future proof if "X-TIMESTAMP-MAP=MPEGTS" in t: time = float( re.search(r"X-TIMESTAMP-MAP=MPEGTS:(\d+)", t).group(1)) / 90000 - 10 text = text[3:len(text) - 2] itmes = [] if len(text) > 1: for n in text: if n: # don't get the empty lines. itmes.append(n) several_items = False skip = False pre_date_skip = True sub = [] for x in range(len(itmes)): item = itmes[x] if strdate(item) and len(subs) > 0 and itmes[x + 1] == subs[-1][1]: ha = strdate(subs[-1][0]) ha3 = strdate(item) second = str2sec(ha3.group(2)) + time subs[-1][0] = "{} --> {}".format(ha.group(1), sec2str(second)) skip = True pre_date_skip = False continue has_date = strdate(item) if has_date: if several_items: subs.append(sub) sub = [] skip = False first = str2sec(has_date.group(1)) + time second = str2sec(has_date.group(2)) + time sub.append("{} --> {}".format(sec2str(first), sec2str(second))) several_items = True pre_date_skip = False elif has_date is None and skip is False and pre_date_skip is False: sub.append(item) if sub: subs.append(sub) string = "" nr = 1 for sub in subs: string += "{}\n{}\n\n".format(nr, "\n".join(sub)) nr += 1 return string def stpp(self, subdata): nr = 1 entries = [] for i in self.kwargs["files"]: res = self.http.get(i) start = res.content.find(b"mdat") + 4 if start > 3: _data = self._tt(res.content[start:].decode(), nr) if _data: entries.append(_data.split("\n\n")) nr += 1 new_entries = [] for entry in entries: for i in entry: if i: new_entries.append(i.split("\n")) entries = new_entries changed = True while changed: changed, entries = _resolv(entries) nr = 1 data = "" for entry in entries: for item in entry: data += f"{item}\n" data += "\n" return data
class subtitle(object): def __init__(self, config, subtype, url, subfix=None, **kwargs): self.url = url self.subtitle = None self.config = config self.subtype = subtype self.http = HTTP(config) self.subfix = subfix self.bom = False self.output = kwargs.pop("output", None) self.kwargs = kwargs def __repr__(self): return "<Subtitle(type={}, url={}>".format(self.subtype, self.url) def download(self): subdata = self.http.request("get", self.url) if subdata.status_code != 200: log.warning("Can't download subtitle file") return data = None if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf": subdata.encoding = "utf-8" self.bom = True if self.subtype == "tt": data = self.tt(subdata) if self.subtype == "json": data = self.json(subdata) if self.subtype == "sami": data = self.sami(subdata) if self.subtype == "smi": data = self.smi(subdata) if self.subtype == "wrst": if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf": subdata.encoding = "utf-8" self.bom = True if "dplay" in self.url: subdata.encoding = "utf-8" data = self.wrst(subdata) if self.subtype == "wrstsegment": data = self.wrstsegment(subdata) if self.subtype == "raw": data = self.raw(subdata) if self.subfix: if self.config.get("get_all_subtitles"): if self.output["episodename"]: self.output["episodename"] = "{}-{}".format( self.output["episodename"], self.subfix) else: self.output["episodename"] = self.subfix if self.config.get("get_raw_subtitles"): subdata = self.raw(subdata) self.save_file(subdata, self.subtype) self.save_file(data, "srt") def save_file(self, data, subtype): if platform.system() == "Windows": file_d = output(self.output, self.config, subtype, mode="wt", encoding="utf-8") else: file_d = output(self.output, self.config, subtype, mode="wt") if hasattr(file_d, "read") is False: return file_d.write(data) file_d.close() def raw(self, subdata): return subdata.text def tt(self, subdata): i = 1 data = "" subs = subdata.text subdata = re.sub(' xmlns="[^"]+"', '', subs, count=1) tree = ET.XML(subdata) xml = tree.find("body").find("div") plist = list(xml.findall("p")) for node in plist: tag = norm(node.tag) if tag == "p" or tag == "span": begin = node.attrib["begin"] if not ("dur" in node.attrib): duration = node.attrib["duration"] else: duration = node.attrib["dur"] if not ("end" in node.attrib): begin2 = begin.split(":") duration2 = duration.split(":") try: sec = float(begin2[2]) + float(duration2[2]) except ValueError: sec = 0.000 end = "%02d:%02d:%06.3f" % (int(begin2[0]), int( begin2[1]), sec) else: end = node.attrib["end"] data += '%s\n%s --> %s\n' % (i, begin.replace( ".", ","), end.replace(".", ",")) data = tt_text(node, data) data += "\n" i += 1 return data def json(self, subdata): data = json.loads(subdata.text) number = 1 subs = "" for i in data: subs += "%s\n%s --> %s\n" % (number, timestr(int( i["startMillis"])), timestr(int(i["endMillis"]))) subs += "%s\n\n" % i["text"] number += 1 return subs def sami(self, subdata): text = subdata.text text = re.sub(r'&', '&', text) tree = ET.fromstring(text) subt = tree.find("Font") subs = "" n = 0 for i in subt.getiterator(): if i.tag == "Subtitle": n = i.attrib["SpotNumber"] if i.attrib["SpotNumber"] == "1": subs += "%s\n%s --> %s\n" % ( i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: subs += "\n%s\n%s --> %s\n" % ( i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: if int(n) > 0 and i.text: subs += "%s\n" % decode_html_entities(i.text) subs = re.sub('&', r'&', subs) return subs def smi(self, subdata): if requests_version < 0x20300: subdata = subdata.content.decode("latin") else: subdata.encoding = "ISO-8859-1" subdata = subdata.text ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r'<(?!\/?i).*?>') bad_char = re.compile(r'\x96') for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n")) text = decode_html_entities(text) if text[len(text) - 2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r'\r') text = bad_char.sub('-', recomp.sub('', subs)) return text def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search( r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format( hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.config.get("convert_subtitle_colors"): colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff' } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) return srt def wrstsegment(self, subdata): time = 0 subs = [] for i in self.kwargs["m3u8"].media_segment: itemurl = get_full_url(i["URI"], self.url) cont = self.http.get(itemurl) if "cmore" in self.url: cont.encoding = "utf-8" text = cont.text.split("\n") for t in text: # is in text[1] for tv4play, but this should be more future proof if 'X-TIMESTAMP-MAP=MPEGTS' in t: time = float( re.search(r"X-TIMESTAMP-MAP=MPEGTS:(\d+)", t).group(1)) / 90000 - 10 text = text[3:len(text) - 2] if len(text) > 1: itmes = [] for n in text: if n: itmes.append(n) else: if len(subs) > 1 and itmes[1] == subs[-1][ 1]: # This will happen when there is two sections in file ha = strdate(subs[-1][0]) ha3 = strdate(itmes[0]) second = str2sec(ha3.group(2)) + time subs[-1][0] = "{} --> {}".format( ha.group(1), sec2str(second)) itmes = [] else: ha = strdate(itmes[0]) first = str2sec(ha.group(1)) + time second = str2sec(ha.group(2)) + time itmes[0] = "{} --> {}".format( sec2str(first), sec2str(second)) subs.append(itmes) itmes = [] if itmes: if len(subs) > 0 and itmes[1] == subs[-1][1]: ha = strdate(subs[-1][0]) ha3 = strdate(itmes[0]) second = str2sec(ha3.group(2)) + time subs[-1][0] = "{} --> {}".format( ha.group(1), sec2str(second)) else: ha = strdate(itmes[0]) first = str2sec(ha.group(1)) + time second = str2sec(ha.group(2)) + time itmes[0] = "{} --> {}".format(sec2str(first), sec2str(second)) subs.append(itmes) string = "" nr = 1 for sub in subs: string += "{}\n{}\n\n".format(nr, '\n'.join(sub)) nr += 1 return string
def select_quality(config, streams): high = 0 if isinstance(config.get("quality"), str): try: quality = int(config.get("quality").split("-")[0]) if len(config.get("quality").split("-")) > 1: high = int(config.get("quality").split("-")[1]) except ValueError: raise error.UIException( "Requested quality is invalid. use a number or range lowerNumber-higherNumber" ) else: quality = config.get("quality") try: optq = int(quality) except ValueError: raise error.UIException("Requested quality needs to be a number") try: optf = int(config.get("flexibleq")) except ValueError: raise error.UIException("Flexible-quality needs to be a number") if optf == 0 and high: optf = (high - quality) / 2 optq = quality + (high - quality) / 2 # Extract protocol prio, in the form of "hls,hds,http", # we want it as a list if config.get("stream_prio"): proto_prio = config.get("stream_prio").split(",") elif config.get("live") or streams[0].config.get("live"): proto_prio = LIVE_PROTOCOL_PRIO else: proto_prio = DEFAULT_PROTOCOL_PRIO # Filter away any unwanted protocols, and prioritize # based on --stream-priority. streams = protocol_prio(streams, proto_prio) if len(streams) == 0: raise error.NoRequestedProtocols(requested=proto_prio, found=list({s.name for s in streams})) # Build a dict indexed by bitrate, where each value # is the stream with the highest priority protocol. stream_hash = {} for s in streams: if s.bitrate not in stream_hash: stream_hash[s.bitrate] = s avail = sorted(stream_hash.keys(), reverse=True) # wanted_lim is a two element tuple defines lower/upper bounds # (inclusive). By default, we want only the best for you # (literally!). wanted_lim = (avail[0], ) * 2 if optq: wanted_lim = (optq - optf, optq + optf) # wanted is the filtered list of available streams, having # a bandwidth within the wanted_lim range. wanted = [a for a in avail if a >= wanted_lim[0] and a <= wanted_lim[1]] # If none remains, the bitrate filtering was too tight. if len(wanted) == 0: data = sort_quality(streams) quality = ", ".join("{} ({})".format(str(x), str(y)) for x, y in data) raise error.UIException("Can't find that quality. Try one of: %s (or " "try --flexible-quality)" % quality) http = HTTP(config) # Test if the wanted stream is available. If not try with the second best and so on. for w in wanted: res = http.get(stream_hash[w].url, cookies=stream_hash[w].kwargs.get("cookies", None)) if res is not None and res.status_code < 404: return stream_hash[w] raise error.UIException("Streams not available to download.")
class subtitle(object): def __init__(self, config, subtype, url, subfix=None, **kwargs): self.url = url self.subtitle = None self.config = config self.subtype = subtype self.http = HTTP(config) self.subfix = subfix self.bom = False self.output = kwargs.pop("output", None) self.kwargs = kwargs def __repr__(self): return "<Subtitle(type={}, url={}>".format(self.subtype, self.url) def download(self): subdata = self.http.request("get", self.url) if subdata.status_code != 200: log.warning("Can't download subtitle file") return data = None if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf": subdata.encoding = "utf-8" self.bom = True if self.subtype == "tt": data = self.tt(subdata) if self.subtype == "json": data = self.json(subdata) if self.subtype == "sami": data = self.sami(subdata) if self.subtype == "smi": data = self.smi(subdata) if self.subtype == "wrst": if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf": subdata.encoding = "utf-8" self.bom = True if "dplay" in self.url: subdata.encoding = "utf-8" data = self.wrst(subdata) if self.subtype == "wrstsegment": data = self.wrstsegment(subdata) if self.subtype == "raw": data = self.raw(subdata) if self.subfix: if self.config.get("get_all_subtitles"): if self.output["episodename"]: self.output["episodename"] = "{}-{}".format(self.output["episodename"], self.subfix) else: self.output["episodename"] = self.subfix if self.config.get("get_raw_subtitles"): subdata = self.raw(subdata) self.save_file(subdata, self.subtype) self.save_file(data, "srt") def save_file(self, data, subtype): if platform.system() == "Windows": file_d = output(self.output, self.config, subtype, mode="wt", encoding="utf-8") else: file_d = output(self.output, self.config, subtype, mode="wt") if hasattr(file_d, "read") is False: return file_d.write(data) file_d.close() def raw(self, subdata): return subdata.text def tt(self, subdata): i = 1 data = "" subs = subdata.text subdata = re.sub(' xmlns="[^"]+"', '', subs, count=1) tree = ET.XML(subdata) xml = tree.find("body").find("div") plist = list(xml.findall("p")) for node in plist: tag = norm(node.tag) if tag == "p" or tag == "span": begin = node.attrib["begin"] if not ("dur" in node.attrib): duration = node.attrib["duration"] else: duration = node.attrib["dur"] if not ("end" in node.attrib): begin2 = begin.split(":") duration2 = duration.split(":") try: sec = float(begin2[2]) + float(duration2[2]) except ValueError: sec = 0.000 end = "%02d:%02d:%06.3f" % (int(begin2[0]), int(begin2[1]), sec) else: end = node.attrib["end"] data += '%s\n%s --> %s\n' % (i, begin.replace(".", ","), end.replace(".", ",")) data = tt_text(node, data) data += "\n" i += 1 return data def json(self, subdata): data = json.loads(subdata.text) number = 1 subs = "" for i in data: subs += "%s\n%s --> %s\n" % (number, timestr(int(i["startMillis"])), timestr(int(i["endMillis"]))) subs += "%s\n\n" % i["text"] number += 1 return subs def sami(self, subdata): text = subdata.text text = re.sub(r'&', '&', text) tree = ET.fromstring(text) allsubs = tree.findall(".//Subtitle") subs = "" increase = 0 for sub in allsubs: try: number = int(sub.attrib["SpotNumber"]) except ValueError: number = int(re.search(r"(\d+)", sub.attrib["SpotNumber"]).group(1)) increase += 1 n = number + increase texts = sub.findall(".//Text") all = "" for text in texts: line = "" for txt in text.itertext(): line += "{}".format(txt) all += "{}\n".format(decode_html_entities(line.lstrip())) subs += "{}\n{} --> {}\n{}\n".format(n, timecolon(sub.attrib["TimeIn"]), timecolon(sub.attrib["TimeOut"]), all) subs = re.sub('&', r'&', subs) return subs def smi(self, subdata): if requests_version < 0x20300: subdata = subdata.content.decode("latin") else: subdata.encoding = "ISO-8859-1" subdata = subdata.text ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r'<(?!\/?i).*?>') bad_char = re.compile(r'\x96') for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n")) text = decode_html_entities(text) if text[len(text) - 2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r'\r') text = bad_char.sub('-', recomp.sub('', subs)) return text def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.config.get("convert_subtitle_colors"): colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff', 'c.black': '#000000', 'c.red': '#ff0000', 'c.green': '#00ff00', 'c.yellow': '#ffff00', 'c.blue': '#0000ff', 'c.magneta': '#ff00ff', 'c.cyan': '#00ffff', 'c.gray': '#ffffff', } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) return srt def wrstsegment(self, subdata): time = 0 subs = [] for i in self.kwargs["m3u8"].media_segment: itemurl = get_full_url(i["URI"], self.url) cont = self.http.get(itemurl) if "cmore" in self.url: cont.encoding = "utf-8" text = cont.text.split("\n") for t in text: # is in text[1] for tv4play, but this should be more future proof if 'X-TIMESTAMP-MAP=MPEGTS' in t: time = float(re.search(r"X-TIMESTAMP-MAP=MPEGTS:(\d+)", t).group(1)) / 90000 - 10 text = text[3:len(text) - 2] if len(text) > 1: itmes = [] for n in text: if n: itmes.append(n) else: if len(subs) > 1 and len(itmes) < 2: # Ignore empty lines in unexpected places pass elif len(subs) > 1 and itmes[1] == subs[-1][1]: # This will happen when there are two sections in file ha = strdate(subs[-1][0]) ha3 = strdate(itmes[0]) second = str2sec(ha3.group(2)) + time subs[-1][0] = "{} --> {}".format(ha.group(1), sec2str(second)) itmes = [] else: ha = strdate(itmes[0]) first = str2sec(ha.group(1)) + time second = str2sec(ha.group(2)) + time itmes[0] = "{} --> {}".format(sec2str(first), sec2str(second)) subs.append(itmes) itmes = [] if itmes: if len(subs) > 0 and itmes[1] == subs[-1][1]: ha = strdate(subs[-1][0]) ha3 = strdate(itmes[0]) second = str2sec(ha3.group(2)) + time subs[-1][0] = "{} --> {}".format(ha.group(1), sec2str(second)) else: ha = strdate(itmes[0]) first = str2sec(ha.group(1)) + time second = str2sec(ha.group(2)) + time itmes[0] = "{} --> {}".format(sec2str(first), sec2str(second)) subs.append(itmes) string = "" nr = 1 for sub in subs: string += "{}\n{}\n\n".format(nr, '\n'.join(sub)) nr += 1 return string
def select_quality(config, streams): high = 0 if isinstance(config.get("quality"), str): try: quality = int(config.get("quality").split("-")[0]) if len(config.get("quality").split("-")) > 1: high = int(config.get("quality").split("-")[1]) except ValueError: raise error.UIException("Requested quality is invalid. use a number or range lowerNumber-higherNumber") else: quality = config.get("quality") try: optq = int(quality) except ValueError: raise error.UIException("Requested quality needs to be a number") try: optf = int(config.get("flexibleq")) except ValueError: raise error.UIException("Flexible-quality needs to be a number") if optf == 0 and high: optf = (high - quality) / 2 optq = quality + (high - quality) / 2 # Extract protocol prio, in the form of "hls,hds,http", # we want it as a list if config.get("stream_prio"): proto_prio = config.get("stream_prio").split(',') elif config.get("live") or streams[0].config.get("live"): proto_prio = LIVE_PROTOCOL_PRIO else: proto_prio = DEFAULT_PROTOCOL_PRIO # Filter away any unwanted protocols, and prioritize # based on --stream-priority. streams = protocol_prio(streams, proto_prio) if len(streams) == 0: raise error.NoRequestedProtocols( requested=proto_prio, found=list(set([s.name for s in streams])) ) # Build a dict indexed by bitrate, where each value # is the stream with the highest priority protocol. stream_hash = {} for s in streams: if s.bitrate not in stream_hash: stream_hash[s.bitrate] = s avail = sorted(stream_hash.keys(), reverse=True) # wanted_lim is a two element tuple defines lower/upper bounds # (inclusive). By default, we want only the best for you # (literally!). wanted_lim = (avail[0],) * 2 if optq: wanted_lim = (optq - optf, optq + optf) # wanted is the filtered list of available streams, having # a bandwidth within the wanted_lim range. wanted = [a for a in avail if a >= wanted_lim[0] and a <= wanted_lim[1]] # If none remains, the bitrate filtering was too tight. if len(wanted) == 0: data = sort_quality(streams) quality = ", ".join("%s (%s)" % (str(x), str(y)) for x, y in data) raise error.UIException("Can't find that quality. Try one of: %s (or " "try --flexible-quality)" % quality) http = HTTP(config) # Test if the wanted stream is available. If not try with the second best and so on. for w in wanted: res = http.get(stream_hash[w].url, cookies=stream_hash[w].kwargs.get("cookies", None)) if res is not None and res.status_code < 404: return stream_hash[w] raise error.UIException("Streams not available to download.")