def get(self): if self.exclude(): yield ServiceError("Excluding video") return vid = self.get_vid() if not vid: yield ServiceError("Can't find video id") return url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities( vid) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) if streams: for n in list(streams.keys()): yield streams[n] url = "http://ljsp.lwcdn.com/web/public/video.json?id={0}&delivery=hls".format( decode_html_entities(vid)) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) if streams: for n in list(streams.keys()): yield streams[n]
def outputfilename(self, data, filename, raw): directory = os.path.dirname(filename) if is_py2: id = hashlib.sha256(data["programVersionId"]).hexdigest()[:7] else: id = hashlib.sha256(data["programVersionId"].encode("utf-8")).hexdigest()[:7] datatitle = re.search('data-title="([^"]+)"', self.get_urldata()) if not datatitle: return None datat = decode_html_entities(datatitle.group(1)) name = self.name(datat) episode = self.seasoninfo(datat) if is_py2: name = name.encode("utf8") if episode: title = "{0}.{1}-{2}-svtplay".format(name, episode, id) else: title = "{0}-{1}-svtplay".format(name, id) title = filenamify(title) if len(directory): output = os.path.join(directory, title) else: output = title return output
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return match = re.search('="(http://www.expressen.se/tvspelare[^"]+)"', data) if not match: log.error("Can't find video id") return url = decode_html_entities(match.group(1)) data = self.http.request("get", url) match = re.search("window.Player.settings = ({.*});", data.text) if not match: log.error("Can't find json info.") dataj = json.loads(match.group(1)) if "streams" in dataj: if "iPad" in dataj["streams"]: streams = hlsparse( self.options, self.http.request("get", dataj["streams"]["iPad"]), dataj["streams"]["iPad"]) for n in list(streams.keys()): yield streams[n] if "hashHls" in dataj["streams"]: streams = hlsparse( self.options, self.http.request("get", dataj["streams"]["hashHls"]), dataj["streams"]["hashHls"]) for n in list(streams.keys()): yield streams[n]
def sami(self, subdata): text = subdata.text if is_py2: text = text.encode("utf8") text = re.sub(r'&', '&', text) tree = ET.fromstring(text) subt = tree.find("Font") subs = "" n = 0 for i in subt.getiterator(): if i.tag == "Subtitle": n = i.attrib["SpotNumber"] if i.attrib["SpotNumber"] == "1": subs += "%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: subs += "\n%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: if int(n) > 0 and i.text: subs += "%s\n" % decode_html_entities(i.text) if is_py2: subs = subs.encode('utf8') subs = re.sub('&', r'&', subs) return subs
def get(self): vid = None data = self.get_urldata() if self.exclude(self.options): yield ServiceError("Excluding video") return match = re.search(r'video url-([^"]+)', data) if not match: match = re.search(r'embed.jsp\?([^"]+)"', self.get_urldata()) if not match: yield ServiceError("Can't find video id") return vid = match.group(1) if not vid: path = unquote_plus(match.group(1)) data = self.http.request("get", "http://www.svd.se%s" % path).content match = re.search(r'embed.jsp\?([^"]+)', data) if not match: yield ServiceError("Can't find video id") return vid = match.group(1) url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(vid) data = self.http.request("get", url).text jdata = json.loads(data) videos = jdata["videos"][0]["media"]["streams"] for i in videos: if i["name"] == "auto": hls = "%s%s" % (jdata["videos"][0]["media"]["base"], i["url"]) streams = hlsparse(self.options, self.http.request("get", hls), hls) if streams: for n in list(streams.keys()): yield streams[n]
def get_one_media(stream, options): if not options.output or os.path.isdir(options.output): data = stream.get_urldata() match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S) if match: options.output_auto = True title_tag = decode_html_entities(match.group(1)) if not options.output: options.output = filenamify(title_tag) else: # output is a directory options.output = os.path.join(options.output, filenamify(title_tag)) if platform.system() == "Windows": # ugly hack. replace \ with / or add extra \ because c:\test\kalle.flv will add c:_tab_est\kalle.flv if options.output.find("\\") > 0: options.output = options.output.replace("\\", "/") videos = [] subs = [] streams = stream.get(options) if streams: for i in streams: if isinstance(i, VideoRetriever): if options.preferred: if options.preferred == i.name(): videos.append(i) else: videos.append(i) if isinstance(i, subtitle): subs.append(i) if options.subtitle and options.output != "-": if subs: subs[0].download(copy.copy(options)) if options.force_subtitle: return if len(videos) > 0: stream = select_quality(options, videos) try: stream.download() except UIException as e: if options.verbose: raise e log.error(e.message) sys.exit(2) if options.thumbnail: if hasattr(stream, "get_thumbnail"): log.info("thumb requested") if options.output != "-": log.info("getting thumbnail") stream.get_thumbnail(options) else: log.info("no thumb requested") else: log.error("Can't find any streams for that url") else: log.error("Can't find any streams for that url")
def filename(options, stream): if options.output: if is_py2: if platform.system() == "Windows": options.output = options.output.decode("latin1") else: options.output = options.output.decode("utf-8") options.output = options.output.replace('"', '').replace("'", "").rstrip('\\') if not options.output or os.path.isdir(options.output): error, data = stream.get_urldata() if error: log.error("Cant find that page") return False if data is None: return False match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S) if match: options.output_auto = True title_tag = decode_html_entities(match.group(1)) if not options.output: options.output = filenamify(title_tag) else: # output is a directory options.output = os.path.join(options.output, filenamify(title_tag)) if platform.system() == "Windows": # ugly hack. replace \ with / or add extra \ because c:\test\kalle.flv will add c:_tab_est\kalle.flv if options.output and options.output.find("\\") > 0: options.output = options.output.replace("\\", "/") return True
def outputfilename(self, data, filename, raw): directory = os.path.dirname(filename) if is_py2: id = hashlib.sha256(data["programVersionId"]).hexdigest()[:7] else: id = hashlib.sha256( data["programVersionId"].encode("utf-8")).hexdigest()[:7] datatitle = re.search('data-title="([^"]+)"', self.get_urldata()) if not datatitle: return None datat = decode_html_entities(datatitle.group(1)) name = self.name(datat) episode = self.seasoninfo(datat) if is_py2: name = name.encode("utf8") if episode: title = "{0}.{1}-{2}-svtplay".format(name, episode, id) else: title = "{0}-{1}-svtplay".format(name, id) title = filenamify(title) if len(directory): output = os.path.join(directory, title) else: output = title return output
def sami(self, subdata): text = subdata.text if is_py2: text = text.encode("utf8") text = re.sub(r'&', '&', text) tree = ET.fromstring(text) subt = tree.find("Font") subs = "" n = 0 for i in subt.getiterator(): if i.tag == "Subtitle": n = i.attrib["SpotNumber"] if i.attrib["SpotNumber"] == "1": subs += "%s\n%s --> %s\n" % ( i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: subs += "\n%s\n%s --> %s\n" % ( i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"])) else: if int(n) > 0 and i.text: subs += "%s\n" % decode_html_entities(i.text) if is_py2: subs = subs.encode('utf8') subs = re.sub('&', r'&', subs) return subs
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return apiurl = None match = re.search('data-player-config="([^"]+)"', data) if not match: yield ServiceError("Can't find video info") return janson = json.loads(decode_html_entities(match.group(1))) videoId = janson["playerOptions"]["id"] apiurl = janson["playerOptions"]["api"] vendor = janson["playerOptions"]["vendor"] self.options.live = janson["live"] if not self.options.live: dataurl = "{}{}/assets/{}?appName=svp-player".format( apiurl, vendor, videoId) data = self.http.request("get", dataurl).text data = json.loads(data) streams = hlsparse( self.options, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"]) if streams: for n in list(streams.keys()): yield streams[n]
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return match = re.search('="(https://www.expressen.se/tvspelare[^"]+)"', data) if not match: log.error("Can't find video id") return url = decode_html_entities(match.group(1)) data = self.http.request("get", url) match = re.search("window.Player.settings = ({.*});", data.text) if not match: log.error("Can't find json info.") dataj = json.loads(match.group(1)) if "streams" in dataj: if "iPad" in dataj["streams"]: streams = hlsparse(self.options, self.http.request("get", dataj["streams"]["iPad"]), dataj["streams"]["iPad"]) for n in list(streams.keys()): yield streams[n] if "hashHls" in dataj["streams"]: streams = hlsparse(self.options, self.http.request("get", dataj["streams"]["hashHls"]), dataj["streams"]["hashHls"]) for n in list(streams.keys()): yield streams[n]
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return apiurl = None match = re.search('data-player-config="([^"]+)"', data) if not match: yield ServiceError("Can't find video info") return janson = json.loads(decode_html_entities(match.group(1))) videoId = janson["playerOptions"]["id"] apiurl = janson["playerOptions"]["api"] vendor = janson["playerOptions"]["vendor"] self.options.live = janson["live"] if not self.options.live: dataurl = "{}{}/assets/{}?appName=svp-player".format(apiurl, vendor, videoId) data = self.http.request("get", dataurl).text data = json.loads(data) streams = hlsparse(self.options, self.http.request("get", data["streamUrls"]["hls"]), data["streamUrls"]["hls"]) if streams: for n in list(streams.keys()): yield streams[n]
def get(self): if self.exclude(): yield ServiceError("Excluding video") return vid = self.get_vid() if not vid: yield ServiceError("Can't find video id") return url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities(vid) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) if streams: for n in list(streams.keys()): yield streams[n] url = "http://ljsp.lwcdn.com/web/public/video.json?id={0}&delivery=hls".format(decode_html_entities(vid)) data = self.http.request("get", url).text jdata = json.loads(data) if "videos" in jdata: streams = self.get_video(jdata) if streams: for n in list(streams.keys()): yield streams[n]
def get(self): data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return match = re.search('data-aptomaId="([-0-9a-z]+)"', data) if not match: match = re.search('data-player-config="([^"]+)"', data) if not match: yield ServiceError("Can't find video info") return janson = json.loads(decode_html_entities(match.group(1))) videoId = janson["videoId"] else: videoId = match.group(1) match = re.search(r'data-isLive="(\w+)"', data) if not match: yield ServiceError("Can't find live info") return if match.group(1) == "true": self.options.live = True if not self.options.live: dataurl = "http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json" % videoId data = self.http.request("get", dataurl).text data = json.loads(data) videoId = data["videoId"] streamsurl = "http://aftonbladet-play-static-ext.cdn.drvideo.aptoma.no/actions/video/?id=%s&formats&callback=" % videoId data = self.http.request("get", streamsurl).text streams = json.loads(data) hlsstreams = streams["formats"]["hls"] if "level3" in hlsstreams.keys(): hls = hlsstreams["level3"] else: hls = hlsstreams["akamai"] if "csmil" in hls.keys(): hls = hls["csmil"][0] else: hls = hls["m3u8"][0] address = hls["address"] path = hls["path"] for i in hls["files"]: if "filename" in i.keys(): plist = "http://%s/%s/%s/master.m3u8" % (address, path, i["filename"]) else: plist = "http://%s/%s/%s" % (address, path, hls["filename"]) streams = hlsparse(self.options, self.http.request("get", plist), plist) if streams: for n in list(streams.keys()): yield streams[n]
def get(self, options): data = self.get_urldata() if self.exclude(options): yield ServiceError("Excluding video") return match = re.search('data-aptomaId="([-0-9a-z]+)"', data) if not match: match = re.search('data-player-config="([^"]+)"', data) if not match: yield ServiceError("Can't find video info") return janson = json.loads(decode_html_entities(match.group(1))) videoId = janson["videoId"] else: videoId = match.group(1) match = re.search(r'data-isLive="(\w+)"', data) if not match: yield ServiceError("Can't find live info") return if match.group(1) == "true": options.live = True if not options.live: dataurl = "http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json" % videoId data = self.http.request("get", dataurl).text data = json.loads(data) videoId = data["videoId"] streamsurl = "http://aftonbladet-play-static-ext.cdn.drvideo.aptoma.no/actions/video/?id=%s&formats&callback=" % videoId data = self.http.request("get", streamsurl).text streams = json.loads(data) hlsstreams = streams["formats"]["hls"] if "level3" in hlsstreams.keys(): hls = hlsstreams["level3"] else: hls = hlsstreams["akamai"] if "csmil" in hls.keys(): hls = hls["csmil"][0] else: hls = hls["m3u8"][0] address = hls["address"] path = hls["path"] for i in hls["files"]: if "filename" in i.keys(): plist = "http://%s/%s/%s/master.m3u8" % (address, path, i["filename"]) else: plist = "http://%s/%s/%s" % (address, path, hls["filename"]) streams = hlsparse(options, self.http.request("get", plist), plist) if streams: for n in list(streams.keys()): yield streams[n]
def find_video_id(self): match = re.search('data-video-id="([^"]+)"', self.get_urldata()) if match: return match.group(1) parse = urlparse(self.url) query = parse_qs(parse.query) match = re.search("/video/([0-9]+)/", parse.path) if match: return match.group(1) match = re.search("/klipp/([0-9]+)/", parse.path) if match: return match.group(1) match = re.search("data-video-id='([^']+)'", self.get_urldata()) if match: return match.group(1) match = re.search("/videoEpisod-([^/]+)/", parse.path) if not match: match = re.search(r'data-id="(\d+)-', self.get_urldata()) vid = None if match: vid = match.group(1) if not vid: for i in query.keys(): if i == "articleId": vid = query["articleId"][0] break if vid: vtype = None for i in ["video", "klipp"]: url = "http://www.svtplay.se/%s/%s/" % (i, vid) data = self.http.request("get", url) if data.status_code == 200: vtype = i break if vtype: self._url = "http://www.svtplay.se/%s/%s/" % (vtype, vid) self._urldata = None self.get_urldata() return self.find_video_id() if not match: match = re.search(r'src="(//www.svt.se/wd?[^"]+)"', self.get_urldata()) if match: self._urldata = None self._url = "http:%s" % decode_html_entities(match.group(1)) self.get_urldata() return self.find_video_id() return None
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": pass elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search(r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)', i) hour1 = int(matchx.group(1)) hour2 = int(matchx.group(4)) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 time = "%s:%s:%s --> %s:%s:%s\n" % (hour1, matchx.group(2), matchx.group(3).replace(".", ","), hour2, matchx.group(5), matchx.group(6).replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt+="\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt
def smi(self, subdata): if requests_version < 0x20300: if is_py2: subdata = subdata.content else: subdata = subdata.content.decode("latin") else: subdata.encoding = "ISO-8859-1" subdata = subdata.text ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r'<(?!\/?i).*?>') bad_char = re.compile(r'\x96') for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n")) text = decode_html_entities(text) if text[len(text) - 2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r'\r') text = bad_char.sub('-', recomp.sub('', subs)) if is_py2 and isinstance(text, unicode): return text.encode("utf-8") return text
def filename(stream): if stream.options.output: if is_py2: if platform.system() == "Windows": stream.options.output = stream.options.output.decode("latin1") else: stream.options.output = stream.options.output.decode("utf-8") if not stream.options.output or os.path.isdir(stream.options.output): data = ensure_unicode(stream.get_urldata()) if data is None: return False match = re.search(r"(?i)<title[^>]*>\s*(.*?)\s*</title>", data, re.S) if match: stream.options.output_auto = True title_tag = decode_html_entities(match.group(1)) if not stream.options.output: stream.options.output = filenamify(title_tag) else: # output is a directory stream.options.output = os.path.join(stream.options.output, filenamify(title_tag)) return True
def get(self): vid = None data = self.get_urldata() if self.exclude(): yield ServiceError("Excluding video") return match = re.search(r'video url-([^"]+)', data) if not match: match = re.search(r'embed.jsp\?([^"]+)"', self.get_urldata()) if not match: yield ServiceError("Can't find video id") return vid = match.group(1) if not vid: path = unquote_plus(match.group(1)) data = self.http.request("get", "http://www.svd.se%s" % path).content match = re.search(r'embed.jsp\?([^"]+)', data) if not match: yield ServiceError("Can't find video id") return vid = match.group(1) url = "http://ljsp.lwcdn.com/web/public/item.json?type=video&%s" % decode_html_entities( vid) data = self.http.request("get", url).text jdata = json.loads(data) videos = jdata["videos"][0]["media"]["streams"] for i in videos: if i["name"] == "auto": hls = "%s%s" % (jdata["videos"][0]["media"]["base"], i["url"]) streams = hlsparse(self.options, self.http.request("get", hls), hls) if streams: for n in list(streams.keys()): yield streams[n]
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.options.convert_subtitle_colors: colors = {'30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff'} sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": pass elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)', i) hour1 = int(matchx.group(1)) hour2 = int(matchx.group(4)) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 time = "%s:%s:%s --> %s:%s:%s\n" % ( hour1, matchx.group(2), matchx.group(3).replace(".", ","), hour2, matchx.group(5), matchx.group(6).replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.options.convert_subtitle_colors: colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff' } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search( r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format( hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.options.convert_subtitle_colors: colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff' } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt