def test_copy_to(self): src = model.Br() src.set_lang("fr") src.set_space(model.WhiteSpaceHandling.PRESERVE) src.set_id("hello") src.add_animation_step( model.DiscreteAnimationStep(styles.StyleProperties.Color, 0, 1, styles.NamedColors.aqua.value)) src.set_style(styles.StyleProperties.Color, styles.NamedColors.green.value) dest = model.Br() src.copy_to(dest) self.assertEqual(dest.get_lang(), src.get_lang()) self.assertEqual(dest.get_space(), src.get_space()) self.assertEqual(dest.get_id(), src.get_id()) self.assertSequenceEqual(list(dest.iter_animation_steps()), list(src.iter_animation_steps())) self.assertSequenceEqual(list(dest.iter_styles()), list(src.iter_styles()))
def from_xml( parent_ctx: typing.Optional[TTMLElement.ParsingContext], xml_elem: et.Element ) -> typing.Optional[BrElement.ParsingContext]: br_ctx = BrElement.ParsingContext(BrElement, parent_ctx, model.Br(parent_ctx.doc)) br_ctx.process(parent_ctx, xml_elem) return br_ctx
def handle_data(self, data): lines = data.split("\n") for i, line in enumerate(lines): if i > 0: self.parent.push_child(model.Br(self.parent.get_doc())) span = model.Span(self.parent.get_doc()) span.push_child(model.Text(self.parent.get_doc(), line)) self.parent.push_child(span)
def test_push_child(self): s = model.Span() s.push_child(model.Br()) s.push_child(model.Span()) s.push_child(model.Text()) with self.assertRaises(TypeError): s.push_child(model.P())
def test_push_child(self): p = model.P() p.push_child(model.Br()) p.push_child(model.Span()) p.push_child(model.Ruby()) with self.assertRaises(TypeError): p.push_child(model.Text())
def process(context, inherited_space, inherited_lang, ttml_element): element = model.Br(context.doc) # process attributes element.set_space( XMLSpaceAttribute.extract(ttml_element) or inherited_space) element.set_lang( XMLLangAttribute.extract(ttml_element) or inherited_lang) ContentElement.process_style_properties(context, ttml_element, element) # process children elements if len(ttml_element) > 0: LOGGER.error("Br cannot contain children elements") if ttml_element.text is not None: LOGGER.error("Br cannot contain text nodes") return element
def to_model(data_file: typing.IO, _config = None, progress_callback=lambda _: None): """Converts an SRT document to the data model""" doc = model.ContentDocument() region = model.Region(_DEFAULT_REGION_ID, doc) region.set_style( styles.StyleProperties.Origin, styles.CoordinateType( x=styles.LengthType(5, styles.LengthType.Units.pct), y=styles.LengthType(5, styles.LengthType.Units.pct) ) ) region.set_style( styles.StyleProperties.Extent, styles.ExtentType( height=styles.LengthType(90, styles.LengthType.Units.pct), width=styles.LengthType(90, styles.LengthType.Units.pct) ) ) region.set_style( styles.StyleProperties.DisplayAlign, styles.DisplayAlignType.after ) region.set_style( styles.StyleProperties.TextAlign, styles.TextAlignType.center ) region.set_style( styles.StyleProperties.LineHeight, _DEFAULT_LINE_HEIGHT ) region.set_style( styles.StyleProperties.FontFamily, _DEFAULT_FONT_STACK ) region.set_style( styles.StyleProperties.FontSize, _DEFAULT_FONT_SIZE ) region.set_style( styles.StyleProperties.Color, _DEFAULT_TEXT_COLOR ) region.set_style( styles.StyleProperties.TextOutline, styles.TextOutlineType( _DEFAULT_OUTLINE_THICKNESS, _DEFAULT_OUTLINE_COLOR ) ) doc.put_region(region) body = model.Body(doc) body.set_region(region) doc.set_body(body) div = model.Div(doc) body.push_child(div) lines : str = data_file.readlines() state = _State.COUNTER current_p = None for line_index, line in enumerate(_none_terminated(lines)): if state is _State.COUNTER: if line is None: break if _EMPTY_RE.fullmatch(line): continue if _COUNTER_RE.search(line) is None: LOGGER.fatal("Missing subtitle counter at line %s", line_index) return None progress_callback(line_index/len(lines)) state = _State.TC continue if state is _State.TC: if line is None: break m = _TIMECODE_RE.search(line) if m is None: LOGGER.fatal("Missing timecode at line %s", line_index) return None current_p = model.P(doc) current_p.set_begin( int(m.group('begin_h')) * 3600 + int(m.group('begin_m')) * 60 + int(m.group('begin_s')) + int(m.group('begin_ms')) / 1000 ) current_p.set_end( int(m.group('end_h')) * 3600 + int(m.group('end_m')) * 60 + int(m.group('end_s')) + int(m.group('end_ms')) / 1000 ) state = _State.TEXT continue if state in (_State.TEXT, _State.TEXT_MORE): if line is None or _EMPTY_RE.fullmatch(line): subtitle_text = subtitle_text.strip('\r\n')\ .replace(r"\n\r", "\n")\ .replace(r"{bold}", r"<bold>")\ .replace(r"{/bold}", r"</bold>")\ .replace(r"{italic}", r"<italic>")\ .replace(r"{/italic}", r"</italic>")\ .replace(r"{underline}", r"<underline>")\ .replace(r"{/underline}", r"</underline>") parser = _TextParser(current_p, line_index) parser.feed(subtitle_text) parser.close() state = _State.COUNTER continue if state is _State.TEXT: div.push_child(current_p) subtitle_text = "" if state is _State.TEXT_MORE: current_p.push_child(model.Br(current_p.get_doc())) subtitle_text += line state = _State.TEXT_MORE continue return doc
def to_model(element: model.ContentElement, is_teletext: bool, tti_cct: bytes, tti_tf: bytes): """Converts the EBU STL text field `tti_tf` to a sequence of span elements and appends them to the content element `element`. `is_teletext` indicates whether the text field is interpreted as teletext or open/undefined. `tti_cct` specifies the text field code page as signaled in the TTI CCT field. """ tf_iter = _TextFieldIterator(tti_tf) decode_func = _CHAR_DECODER_MAP.get(tti_cct) if decode_func is None: decode_func = iso6937.decode LOGGER.error("Unknown Text Field character set: %s", str(tti_cct)) context = _Context(element, is_teletext, decode_func) while True: c = tf_iter.cur() if _is_unused_space_code(c): break if _is_character_code(c): if _is_printable_code(c) or (_is_printable_code(tf_iter.peek_next()) and _is_printable_code(tf_iter.peek_prev())): context.append_character(c) elif _is_newline_code(c): if not _is_newline_code(tf_iter.peek_next()) and not _is_unused_space_code(tf_iter.peek_next()): context.end_span() element.push_child(model.Br(element.get_doc())) if is_teletext: context.reset_styles(is_teletext) elif _is_control_code(c): context.end_span() if c == 0x1C: context.set_bg_color(styles.NamedColors.black.value) elif c == 0x85: context.set_bg_color(styles.NamedColors.transparent.value) elif c == 0x1D: context.set_bg_color(context.get_fg_color()) elif c == 0x00: context.set_fg_color(styles.NamedColors.black.value) elif c == 0x01: context.set_fg_color(styles.NamedColors.red.value) elif c == 0x02: context.set_fg_color(styles.NamedColors.lime.value) elif c == 0x03: context.set_fg_color(styles.NamedColors.yellow.value) elif c == 0x04: context.set_fg_color(styles.NamedColors.blue.value) elif c == 0x05: context.set_fg_color(styles.NamedColors.magenta.value) elif c == 0x06: context.set_fg_color(styles.NamedColors.cyan.value) elif c == 0x07: context.set_fg_color(styles.NamedColors.white.value) elif c == 0x80: context.set_italic(True) elif c == 0x81: context.set_italic(False) elif c == 0x82: context.set_underline(True) elif c == 0x83: context.set_underline(False) if (_is_printable_code(tf_iter.peek_next()) and _is_printable_code(tf_iter.peek_prev())): context.append_character(0X20) next(tf_iter) context.end_span()
def process_tti_block(self, tti_block: bytes): """Processes a single TTI block """ if tti_block is None: raise ValueError("tti_block should not be None") tti = _TTIBlock._make(struct.unpack('<BHBBBBBBBBBBBBB112s', tti_block)) LOGGER.debug("Subtitle SN: %s", tti.SN) LOGGER.debug(" EBN: %s", tti.EBN) LOGGER.debug(" CS: %s", tti.CS) LOGGER.debug(" SGN: %s", tti.SGN) LOGGER.debug(" JC: %s", tti.JC) LOGGER.debug(" VP: %s", tti.VP) if 0xEF < tti.EBN < 0xFF: # skip user data and reserved blocks return if not self.is_in_extension: self.tti_tf = b'' self.tti_tf += tti.TF.strip(b'\x8f') is_double_height_characters = tf.has_double_height_char(self.tti_tf) # continue accumulating if we have an extension block if tti.EBN != 0xFF: self.is_in_extension = True return self.is_in_extension = False # apply program offset try: tci = SmpteTimeCode(tti.TCIh, tti.TCIm, tti.TCIs, tti.TCIf, self.get_fps()) tco = SmpteTimeCode(tti.TCOh, tti.TCOm, tti.TCOs, tti.TCOf, self.get_fps()) except ValueError: LOGGER.error("Invalid TTI timecode") return begin_time = tci.to_temporal_offset() - self.start_offset if begin_time < 0: LOGGER.debug( "Skipping subtitle because TCI is less than start time") return LOGGER.debug(" Time in: %s", tci) end_time = tco.to_temporal_offset() - self.start_offset if end_time < begin_time: LOGGER.error("Subtitle TCO is less than TCI") return LOGGER.debug(" Time out: %s", tco) # create a new subtitle if SN changes and we are not in cumulative mode if tti.SN is not self.last_sn and tti.CS in (0x00, 0x01): self.last_sn = tti.SN # find the div to which the subtitle belongs, based on SGN div_element = self.sgn_to_div_map.get(tti.SGN) # create the div if it does not exist if div_element is None: div_element = model.Div(self.doc) self.body.push_child(div_element) self.sgn_to_div_map[tti.SGN] = div_element # create the p that will hold the subtitle self.cur_p_element = model.P(self.doc) if tti.JC == 0x01: self.cur_p_element.set_style(styles.StyleProperties.TextAlign, styles.TextAlignType.start) elif tti.JC == 0x03: self.cur_p_element.set_style(styles.StyleProperties.TextAlign, styles.TextAlignType.end) else: self.cur_p_element.set_style(styles.StyleProperties.TextAlign, styles.TextAlignType.center) self.cur_p_element.set_style( styles.StyleProperties.LineHeight, styles.LengthType(DEFAULT_LINE_HEIGHT_PCT, styles.LengthType.Units.pct)) if self.is_teletext() and not is_double_height_characters: font_size = DEFAULT_SINGLE_HEIGHT_FONT_SIZE_PCT else: font_size = DEFAULT_DOUBLE_HEIGHT_FONT_SIZE_PCT self.cur_p_element.set_style( styles.StyleProperties.FontSize, styles.LengthType(font_size, styles.LengthType.Units.pct)) safe_area_height = round(100 - DEFAULT_VERTICAL_SAFE_MARGIN_PCT * 2) safe_area_width = round(100 - DEFAULT_HORIZONTAL_SAFE_MARGIN_PCT * 2) # assume that VP < max number of rows/2 means bottom-aligned and otherwise top-aligned # probably should offer an option to override this if tti.VP < self.get_max_row_count() // 2: # top-aligned large region r_y = DEFAULT_VERTICAL_SAFE_MARGIN_PCT + ( (tti.VP - 1) / self.get_max_row_count()) * safe_area_height r_height = 100 - DEFAULT_VERTICAL_SAFE_MARGIN_PCT - r_y region = _get_region_from_model( self.doc, round(DEFAULT_HORIZONTAL_SAFE_MARGIN_PCT), r_y, safe_area_width, r_height, styles.DisplayAlignType.before) else: line_count = tf.line_count(self.tti_tf, is_double_height_characters) vp = tti.VP line_height = 2 if is_double_height_characters else 1 r_y = DEFAULT_VERTICAL_SAFE_MARGIN_PCT r_height = ((vp + line_count * line_height - 1) / self.get_max_row_count()) * safe_area_height region = _get_region_from_model( self.doc, round(DEFAULT_HORIZONTAL_SAFE_MARGIN_PCT), r_y, safe_area_width, r_height, styles.DisplayAlignType.after) self.cur_p_element.set_region(region) div_element.push_child(self.cur_p_element) if tti.CS in (0x01, 0x02, 0x03): # create a nested span if we are in cumulative mode sub_element = model.Span(self.doc) self.cur_p_element.push_child(sub_element) else: sub_element = self.cur_p_element sub_element.set_begin(begin_time) sub_element.set_end(end_time) LOGGER.debug(" TF: %s", self.tti_tf) tf.to_model(sub_element, self.is_teletext(), self.get_cct(), self.tti_tf) if tti.CS in (0x01, 0x02): sub_element.push_child(model.Br(self.doc))