def convert(self, filename=None, keep_vtt=False): if filename: seq = 1 fname = filename.replace(".vtt", ".srt") content = self._vttcontents(fname=filename) if content and isinstance(content, list): timecode_loc = self._locate_timecode(content) if not timecode_loc.get("status"): return { "status": "False", "msg": "subtitle file seems to have malfunction skipping conversion ..", } for line in content[timecode_loc.get("location") :]: flag = self._is_timecode(timecode=line) if flag: timecode = self._generate_timecode(seq, unescapeHTML(line)) self._write_srtcontent(fname, timecode) seq += 1 if not flag: match = re.match("^([0-9]{1,3})$", line, flags=re.U) if not match: data = "{content}\r\n".format(content=line) self._write_srtcontent(fname, data) else: return content if not keep_vtt: try: os.unlink(filename) except Exception: # pylint: disable=W pass return {"status": "True", "msg": "successfully generated subtitle in srt ..."}
def _locate_timecode(self, content): loc = "" for (loc, line) in enumerate(content): match = re.match(self._TIMECODE_REGEX, line, flags=re.U) if match: return {"status": True, "location": loc} return {"status": False, "location": loc}
def fix_kv(m): v = m.group(0) if v in ("true", "false", "null"): return v elif v.startswith("/*") or v.startswith("//") or v == ",": return "" if v[0] in ("'", '"'): v = re.sub( r'(?s)\\.|"', lambda m: { '"': '\\"', "\\'": "'", "\\\n": "", "\\x": "\\u00", }.get(m.group(0), m.group(0)), v[1:-1], ) for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: i = int(im.group(1), base) return '"%d":' % i if v.endswith(":") else "%d" % i return '"%s"' % v
def _generate_timecode(self, sequence, timecode): match = re.match(self._TIMECODE, timecode, flags=re.U) if match: start, end = ( self._fix_timecode( timecode=re.sub(r"[\.,]", ",", match.group("appeartime")) ), self._fix_timecode( timecode=re.sub(r"[\.,]", ",", match.group("disappertime")) ), ) return "{seq}\r\n{appeartime} --> {disappertime}\r\n".format( seq=sequence, appeartime=start, disappertime=end ) return ""
def _is_timecode(self, timecode): match = re.match(self._TIMECODE_REGEX, timecode, flags=re.U) if match: return True return False