def _translate_tag(self, tag): # check to see if tag is just a string try: tag_name = tag.name except AttributeError: # if no more tags found, strip text self.line.append(CaptionData.create_text(tag.strip())) return # convert line breaks if tag.name == 'br': self.line.append(CaptionData.create_break()) # convert italics elif tag.name == 'i': self.line.append(CaptionData.create_style(True, {'italics': True})) # recursively call function for any children elements for a in tag.contents: self._translate_tag(a) self.line.append(CaptionData.create_style(False, {'italics': True})) elif tag.name == 'span': self._translate_span(tag) else: # recursively call function for any children elements for a in tag.contents: self._translate_tag(a)
def _convert_to_caption(self, buffer, start): # check to see if previous caption needs an end-time if self.scc and self.scc[-1].end == 0: self.scc[-1].end = start # initial variables caption = Caption() caption.start = start caption.end = 0 # Not yet known; filled in later self.open_italic = False self.first_element = True # split into elements (e.g. break, italics, text) for element in buffer.split('<$>'): # skip empty elements if element.strip() == '': continue # handle line breaks elif element == '{break}': self._translate_break(caption) # handle open italics elif element == '{italic}': # add italics caption.nodes.append(CaptionData.create_style(True, {'italics': True})) # open italics, no longer first element self.open_italic = True self.first_element = False # handle clone italics elif element == '{end-italic}' and self.open_italic: caption.nodes.append(CaptionData.create_style(False, {'italics': True})) self.open_italic = False # handle text else: # add text caption.nodes.append(CaptionData.create_text(' '.join(element.decode("utf-8").split()))) # no longer first element self.first_element = False # close any open italics left over if self.open_italic == True: caption.nodes.append(CaptionData.create_style(False, {'italics': True})) # remove extraneous italics tags in the same caption self._remove_italics(caption) # only add captions to list if content inside exists if caption.nodes: self.scc.append(caption)
def _translate_break(self, caption): # if break appears at start of caption, skip break if self.first_element == True: return # if the last caption was a break, skip this break elif caption.nodes[-1].type == CaptionData.BREAK: return # close any open italics elif self.open_italic == True: caption.nodes.append(CaptionData.create_style(False, {'italics': True})) self.open_italic = False # add line break caption.nodes.append(CaptionData.create_break())
def _translate_span(self, tag): # convert tag attributes args = self._translate_attrs(tag) # only include span tag if attributes returned if args != '': node = CaptionData.create_style(True, args) self.line.append(node) # recursively call function for any children elements for a in tag.contents: self._translate_tag(a) node = CaptionData.create_style(False, args) self.line.append(node) else: for a in tag.contents: self._translate_tag(a)
def _translate_tag(self, tag): # ensure that tag is not just text try: tag_name = tag.name # if no more tags found, strip text except AttributeError: if tag.strip() != '': node = CaptionData.create_text(tag.strip()) self.nodes.append(node) return # convert line breaks if tag_name == 'br': self.nodes.append(CaptionData.create_break()) # convert italics elif tag_name == 'span': # convert span self._translate_span(tag) else: # recursively call function for any children elements for a in tag.contents: self._translate_tag(a)
def _combine_matching_captions(self, captionset): for lang in captionset.get_languages(): captions = captionset.get_captions(lang) new_caps = captions[:1] for caption in captions[1:]: if caption.start == new_caps[-1].start and caption.end == new_caps.end: new_caps[-1].nodes.append(CaptionData.create_break()) new_caps[-1].nodes.extend(caption.nodes) else: new_caps.append(caption) captionset.set_captions(lang, new_caps) return captionset
def read(self, content, lang="en"): captions = CaptionSet() inlines = content.splitlines() start_line = 0 subdata = [] while start_line < len(inlines): if not inlines[start_line].isdigit(): break caption = Caption() end_line = self._find_text_line(start_line, inlines) timing = inlines[start_line + 1].split("-->") caption.start = self._srttomicro(timing[0].strip(" \r\n")) caption.end = self._srttomicro(timing[1].strip(" \r\n")) for line in inlines[start_line + 2 : end_line - 1]: caption.nodes.append(CaptionData.create_text(line)) caption.nodes.append(CaptionData.create_break()) caption.nodes.pop() # remove last line break from end of caption list subdata.append(caption) start_line = end_line captions.set_captions(lang, subdata) return captions