def smi(self, subdata): if is_py3: subdata = subdata.decode("latin1") ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r'<[^>]+>') bad_char = re.compile(r'\x96') for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n")) if text[len(text)-2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r'\r') text = bad_char.sub('-', recomp.sub('', subs)).replace('"', '"') if is_py3: return text.encode("utf-8") return text
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": pass elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search(r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)', i) hour1 = int(matchx.group(1)) hour2 = int(matchx.group(4)) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 time = "%s:%s:%s --> %s:%s:%s\n" % (hour1, matchx.group(2), matchx.group(3).replace(".", ","), hour2, matchx.group(5), matchx.group(6).replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt+="\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt
def smi(self, subdata): if requests_version < 0x20300: if is_py2: subdata = subdata.content else: subdata = subdata.content.decode("latin") else: subdata.encoding = "ISO-8859-1" subdata = subdata.text ssubdata = StringIO(subdata) timea = 0 number = 1 data = None subs = "" TAG_RE = re.compile(r'<(?!\/?i).*?>') bad_char = re.compile(r'\x96') for i in ssubdata.readlines(): i = i.rstrip() sync = re.search(r"<SYNC Start=(\d+)>", i) if sync: if int(sync.group(1)) != int(timea): if data and data != " ": subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1))) text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n")) text = decode_html_entities(text) if text[len(text) - 2] != "\n": text += "\n" subs += text number += 1 timea = sync.group(1) text = re.search("<P Class=SVCC>(.*)", i) if text: data = text.group(1) recomp = re.compile(r'\r') text = bad_char.sub('-', recomp.sub('', subs)) if is_py2 and isinstance(text, unicode): return text.encode("utf-8") return text
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": pass elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r'(\d+):(\d+)[.:]([\d\.]+) --> (\d+):(\d+)[.:]([\d\.]+)', i) hour1 = int(matchx.group(1)) hour2 = int(matchx.group(4)) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 time = "%s:%s:%s --> %s:%s:%s\n" % ( hour1, matchx.group(2), matchx.group(3).replace(".", ","), hour2, matchx.group(5), matchx.group(6).replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.options.convert_subtitle_colors: colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff' } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search( r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search( r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format( hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.options.convert_subtitle_colors: colors = { '30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff' } sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt
def wrst(self, subdata): ssubdata = StringIO(subdata.text) srt = "" subtract = False number_b = 1 number = 0 block = 0 subnr = False if self.bom: ssubdata.read(1) for i in ssubdata.readlines(): match = re.search(r"^[\r\n]+", i) match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i) match3 = re.search(r"^(\d+)\s", i) if i[:6] == "WEBVTT": continue elif "X-TIMESTAMP" in i: continue elif match and number_b == 1 and self.bom: continue elif match and number_b > 1: block = 0 srt += "\n" elif match2: if not subnr: srt += "%s\n" % number_b matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i) if matchx: hour1 = int(matchx.group("h1")) hour2 = int(matchx.group("h2")) if int(number) == 1: if hour1 > 9: subtract = True if subtract: hour1 -= 10 hour2 -= 10 else: matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i) hour1 = 0 hour2 = 0 time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","), hour2, matchx.group("m2"), matchx.group("s2").replace(".", ",")) srt += time block = 1 subnr = False number_b += 1 elif match3 and block == 0: number = match3.group(1) srt += "%s\n" % number subnr = True else: if self.options.convert_subtitle_colors: colors = {'30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00', '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff'} sub = i for tag, color in colors.items(): regex1 = '<' + tag + '>' replace = '<font color="' + color + '">' sub = re.sub(regex1, replace, sub) sub = re.sub('</.+>', '</font>', sub) else: sub = re.sub('<[^>]*>', '', i) srt += sub.strip() srt += "\n" srt = decode_html_entities(srt) if is_py2: return srt.encode("utf-8") return srt