def read(self, content, lang=u'en-US', simulate_roll_up=False, offset=0): """Converts the unicode string into a CaptionSet :type content: six.text_type :param content: The SCC content to be converted to a CaptionSet :type lang: six.text_type :param lang: The language of the caption :type simulate_roll_up: bool :param simulate_roll_up: If True, when converting to other formats, the resulting captions will contain all the rows that were visible on the screen when the captions were rolling up. :type offset: int :param offset: :rtype: CaptionSet """ if type(content) != six.text_type: raise InvalidInputError(u'The content is not a unicode string.') self.simulate_roll_up = simulate_roll_up self.time_translator.offset = offset * 1000000 # split lines lines = content.splitlines() # loop through each line except the first for line in lines[1:]: self._translate_line(line) self._flush_implicit_buffers() captions = CaptionSet({lang: self.caption_stash.get_all()}) # check captions for incorrect lengths for cap in captions.get_captions(lang): # if there's an end time on a caption and the difference is # less than .05s kill it (this is likely caused by a standalone # EOC marker in the SCC file) if 0 < cap.end - cap.start < 50000: raise ValueError( 'unsupported length found in SCC input file: ' + six.text_type(cap)) if captions.is_empty(): raise CaptionReadNoCaptions(u"empty caption file") else: last_caption = captions.get_captions(lang)[-1] last_caption.end = get_corrected_end_time(last_caption) return captions
def read(self, content, lang=u'en-US', simulate_roll_up=False, offset=0): """Converts the unicode string into a CaptionSet :type content: unicode :param content: The SCC content to be converted to a CaptionSet :type lang: unicode :param lang: The language of the caption :type simulate_roll_up: bool :param simulate_roll_up: If True, when converting to other formats, the resulting captions will contain all the rows that were visible on the screen when the captions were rolling up. :type offset: int :param offset: :rtype: CaptionSet """ if type(content) != unicode: raise RuntimeError(u'The content is not a unicode string.') # Preparation. Clear the cached positioning from when processing # other captions DefaultProvidingPositionTracer.reset_default_positioning() self.simulate_roll_up = simulate_roll_up self.time_translator.offset = offset * 1000000 # split lines lines = content.splitlines() # loop through each line except the first for line in lines[1:]: self._translate_line(line) self._flush_implicit_buffers() captions = CaptionSet() captions.set_captions(lang, self.caption_stash.get_all()) if captions.is_empty(): raise CaptionReadNoCaptions(u"empty caption file") return captions
def read(self, content, lang=u'en-US', simulate_roll_up=False, offset=0): """Converts the unicode string into a CaptionSet :type content: unicode :param content: The SCC content to be converted to a CaptionSet :type lang: unicode :param lang: The language of the caption :type simulate_roll_up: bool :param simulate_roll_up: If True, when converting to other formats, the resulting captions will contain all the rows that were visible on the screen when the captions were rolling up. :type offset: int :param offset: :rtype: CaptionSet """ if type(content) != unicode: raise InvalidInputError(u'The content is not a unicode string.') self.simulate_roll_up = simulate_roll_up self.time_translator.offset = offset * 1000000 # split lines lines = content.splitlines() # loop through each line except the first for line in lines[1:]: self._translate_line(line) self._flush_implicit_buffers() captions = CaptionSet() captions.set_captions(lang, self.caption_stash.get_all()) if captions.is_empty(): raise CaptionReadNoCaptions(u"empty caption file") else: last_caption = captions.get_captions(lang)[-1] last_caption.end = get_corrected_end_time(last_caption) return captions