from babelsubs.generators.base import BaseGenerator, register class TXTGenerator(BaseGenerator): file_type = 'txt' def __init__(self, subtitle_set, line_delimiter=u'\n\n', language=None): """ Generator is list of {'text': 'text', 'start': 'seconds', 'end': 'seconds'} """ self.subtitle_set = subtitle_set self.line_delimiter = line_delimiter self.language = language def __unicode__(self): output = [] for _, _, content, _ in self.subtitle_set.subtitle_items(): content and output.append(content.strip()) return self.line_delimiter.join(output) register(TXTGenerator)
}, ... ] """ file_type = 'json' MAPPINGS = dict(linebreaks="<br>", bold="<b>%s</b>", italics="<i>%s</i>", underline="<u>%s</u>", quote_text=escape) def __unicode__(self): output = [] # FIXME: allow formatting tags i = 1 for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items( mappings=self.MAPPINGS): output.append({ 'start': from_ms, 'end': to_ms, 'text': content, 'position': i, 'meta': meta }) i += 1 return json.dumps(output) register(JSONGenerator)
if len(subtitle_sets) == 0: raise TypeError( "DFXPGenerator.merge_subtitles: No subtitles given") if initial_ttml is None: tt = SubtitleSet('').as_etree_node() body = tt.find(TTML + 'body') body.remove(body.find(TTML + 'div')) else: tt = initial_ttml body = tt.find(TTML + 'body') if body is None: raise ValueError("no body tag") # set the default language to blank. We will create a div for each # subtitle set and set xml:lang on that. tt.set(XML + 'lang', '') # for each subtitle set we will append the body of tt for i, subtitle_set in enumerate(subtitle_sets): root_elt = subtitle_set.as_etree_node() language_code = root_elt.get(XML + 'lang') lang_div = etree.SubElement(body, TTML + 'div') lang_div.set(XML + 'lang', language_code) lang_div.extend(root_elt.find(TTML + 'body').findall(TTML + 'div')) utils.indent_ttml(tt) return etree.tostring(tt) register(DFXPGenerator)
"end": 40300, "text": "Hello there", "position": 1 }, ... ] """ file_type = 'json' MAPPINGS = dict(linebreaks="<br>", bold="<b>%s</b>", italics="<i>%s</i>", underline="<u>%s</u>", quote_text=escape) def __unicode__(self): output = [] # FIXME: allow formatting tags i = 1 for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items(mappings=self.MAPPINGS): output.append({ 'start': from_ms, 'end': to_ms, 'text': content, 'position': i, 'meta': meta }) i += 1 return json.dumps(output) register(JSONGenerator)
def __init__(self, subtitle_set, language=None): super(WEBVTTGenerator, self).__init__(subtitle_set, language) self.line_delimiter = '\r\n' def __unicode__(self): output = ['WEBVTT\n'] i = 1 for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items(mappings=self.MAPPINGS): output.append(unicode(i)) output.append(u'%s --> %s' % ( self.format_time(from_ms), self.format_time(to_ms) )) output.append(content) output.append(u'') i += 1 return self.line_delimiter.join(output)[:-1] def format_time(self, milliseconds): if milliseconds is None: milliseconds = UNSYNCED_TIME_FULL seconds, milliseconds = divmod(int(milliseconds), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u"%02i:%02i:%02i.%03i" % (hours, minutes, seconds, milliseconds) register(WEBVTTGenerator)
def __init__(self, subtitle_set, language=None): super(HTMLGenerator, self).__init__(subtitle_set, language) self.line_delimiter = '\r\n' def __unicode__(self): output = [] i = 1 for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items(mappings=self.MAPPINGS): output.append(unicode(i)) output.append(u'%s --> %s' % ( self.format_time(from_ms), self.format_time(to_ms) )) output.append(content) output.append(u'') i += 1 return self.line_delimiter.join(output) def format_time(self, milliseconds): if milliseconds is None: milliseconds = UNSYNCED_TIME_FULL seconds, milliseconds = divmod(int(milliseconds), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u"%02i:%02i:%02i,%03i" % (hours, minutes, seconds, milliseconds) register(HTMLGenerator)
quote_text=escape) def __init__(self, subtitle_set, language=None): super(HTMLGenerator, self).__init__(subtitle_set, language) self.line_delimiter = '\r\n' def __unicode__(self): output = [] i = 1 for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items( mappings=self.MAPPINGS): output.append(unicode(i)) output.append(u'%s --> %s' % (self.format_time(from_ms), self.format_time(to_ms))) output.append(content) output.append(u'') i += 1 return self.line_delimiter.join(output) def format_time(self, milliseconds): if milliseconds is None: milliseconds = UNSYNCED_TIME_FULL seconds, milliseconds = divmod(int(milliseconds), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u"%02i:%02i:%02i,%03i" % (hours, minutes, seconds, milliseconds) register(HTMLGenerator)
def __init__(self, subtitle_set, language=None): super(SRTGenerator, self).__init__(subtitle_set, language) self.line_delimiter = '\r\n' def __unicode__(self): output = [] i = 1 # FIX ME: allow formatting tags for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items( mappings=self.MAPPINGS): output.append(unicode(i)) output.append(u'%s --> %s' % (self.format_time(from_ms), self.format_time(to_ms))) output.append(content) output.append(u'') i += 1 return self.line_delimiter.join(output) def format_time(self, milliseconds): if milliseconds is None: milliseconds = UNSYNCED_TIME_FULL seconds, milliseconds = divmod(int(milliseconds), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u"%02i:%02i:%02i,%03i" % (hours, minutes, seconds, milliseconds) register(SRTGenerator)
"""Combine multiple subtitles sets into a single XML string. """ if len(subtitle_sets) == 0: raise TypeError("DFXPGenerator.merge_subtitles: No subtitles given") if initial_ttml is None: tt = SubtitleSet('').as_etree_node() body = tt.find(TTML + 'body') body.remove(body.find(TTML + 'div')) else: tt = initial_ttml body = tt.find(TTML + 'body') if body is None: raise ValueError("no body tag") # set the default language to blank. We will create a div for each # subtitle set and set xml:lang on that. tt.set(XML + 'lang', '') # for each subtitle set we will append the body of tt for i, subtitle_set in enumerate(subtitle_sets): root_elt = subtitle_set.as_etree_node() language_code = root_elt.get(XML + 'lang') lang_div = etree.SubElement(body, TTML + 'div') lang_div.set(XML + 'lang', language_code) lang_div.extend(root_elt.find(TTML + 'body').findall(TTML + 'div')) utils.indent_ttml(tt) return etree.tostring(tt) register(DFXPGenerator)
MAPPINGS = dict(linebreaks="[br]") def __init__(self, subtitles_set, line_delimiter=u'\r\n', language=None): super(SBVGenerator, self).__init__(subtitles_set, line_delimiter, language) def __unicode__(self): output = [] for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items(self.MAPPINGS): start = self.format_time(from_ms) end = self.format_time(to_ms) output.append(u'%s,%s' % (start, end)) output.append(content.strip()) output.append(u'') return self.line_delimiter.join(output) def format_time(self, time): if not time: time = UNSYNCED_TIME_ONE_HOUR_DIGIT seconds, milliseconds = divmod(int(time), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u'%01i:%02i:%02i.%03i' % (hours, minutes, seconds, milliseconds) register(SBVGenerator)
else: components = utils.milliseconds_to_time_clock_components( milliseconds, unsynced_val=utils.UNSYNCED_TIME_ONE_HOUR_DIGIT, use_centiseconds=True) return u'%(hours)i:%(minutes)02i:%(seconds)02i.%(centiseconds)02i' % components def _clean_text(self, text): return text.replace('\n', ' ') def _content(self): dl = self.line_delimiter output = [] output.append(u'[Events]%s' % dl) output.append( u'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text%s' % dl) tpl = u'Dialogue: 0,%s,%s,Default,,0000,0000,0000,,%s%s' for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items( self.MAPPINGS): start = self.format_time(from_ms) end = self.format_time(to_ms) text = self._clean_text(content) output.append(tpl % (start, end, text, dl)) return ''.join(output) register(SSAGenerator)
def __init__(self, subtitle_set, language=None): super(SRTGenerator, self).__init__(subtitle_set, language) self.line_delimiter = '\r\n' def __unicode__(self): output = [] i = 1 # FIX ME: allow formatting tags for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items(mappings=self.MAPPINGS): output.append(unicode(i)) output.append(u'%s --> %s' % ( self.format_time(from_ms), self.format_time(to_ms) )) output.append(content) output.append(u'') i += 1 return self.line_delimiter.join(output) def format_time(self, milliseconds): if milliseconds is None: milliseconds = UNSYNCED_TIME_FULL seconds, milliseconds = divmod(int(milliseconds), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u"%02i:%02i:%02i,%03i" % (hours, minutes, seconds, milliseconds) register(SRTGenerator)
def format_time(self, milliseconds): if milliseconds is None: components = utils.unsynced_time_components(one_hour_digit=True, uses_centiseconds=True) else: components = utils.milliseconds_to_time_clock_components( milliseconds, unsynced_val=utils.UNSYNCED_TIME_ONE_HOUR_DIGIT, use_centiseconds=True) return u'%(hours)i:%(minutes)02i:%(seconds)02i.%(centiseconds)02i' % components def _clean_text(self, text): return text.replace('\n', ' ') def _content(self): dl = self.line_delimiter output = [] output.append(u'[Events]%s' % dl) output.append(u'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text%s' % dl) tpl = u'Dialogue: 0,%s,%s,Default,,0000,0000,0000,,%s%s' for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items(self.MAPPINGS): start = self.format_time(from_ms) end = self.format_time(to_ms) text = self._clean_text(content) output.append(tpl % (start, end, text, dl)) return ''.join(output) register(SSAGenerator)
MAPPINGS = dict(linebreaks="[br]") def __init__(self, subtitles_set, line_delimiter=u'\r\n', language=None): super(SBVGenerator, self).__init__(subtitles_set, line_delimiter, language) def __unicode__(self): output = [] for from_ms, to_ms, content, meta in self.subtitle_set.subtitle_items( self.MAPPINGS): start = self.format_time(from_ms) end = self.format_time(to_ms) output.append(u'%s,%s' % (start, end)) output.append(content.strip()) output.append(u'') return self.line_delimiter.join(output) def format_time(self, time): if time is None: time = UNSYNCED_TIME_ONE_HOUR_DIGIT seconds, milliseconds = divmod(int(time), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u'%01i:%02i:%02i.%03i' % (hours, minutes, seconds, milliseconds) register(SBVGenerator)
if sub.new_paragraph: output.append(u'NOTE Paragraph') output.append(u'') output.append(self.format_cue_header(sub)) output.append(sub.text) output.append(u'') return self.line_delimiter.join(output)[:-1] def format_cue_header(self, sub): parts = [] parts.append(u'%s --> %s' % ( self.format_time(sub.start_time), self.format_time(sub.end_time) )) if sub.region == 'top': parts.append('line:1') return ' '.join(parts) def format_time(self, milliseconds): if milliseconds is None: milliseconds = UNSYNCED_TIME_FULL seconds, milliseconds = divmod(int(milliseconds), 1000) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return u"%02i:%02i:%02i.%03i" % (hours, minutes, seconds, milliseconds) register(WEBVTTGenerator)