def _read_ttml(self, input_file): """ Read from TTML file """ ttml_ns = "{http://www.w3.org/ns/ttml}" xml_ns = "{http://www.w3.org/XML/1998/namespace}" contents = input_file.read() root = etree.fromstring(contents.encode("utf-8")) language = root.get(xml_ns + "lang") for elem in root.iter(ttml_ns + "p"): identifier = elem.get(xml_ns + "id") begin = gf.time_from_ttml(elem.get("begin")) end = gf.time_from_ttml(elem.get("end")) lines = self._get_lines_from_node_text(elem) text_fragment = TextFragment(identifier=identifier, language=language, lines=lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment)
def parse(self, input_text, syncmap): from lxml import etree ttml_ns = "{http://www.w3.org/ns/ttml}" xml_ns = "{http://www.w3.org/XML/1998/namespace}" root = etree.fromstring(gf.safe_bytes(input_text)) language = root.get(xml_ns + "lang") for elem in root.iter(ttml_ns + "p"): identifier = gf.safe_unicode(elem.get(xml_ns + "id")) begin = gf.time_from_ttml(elem.get("begin")) end = gf.time_from_ttml(elem.get("end")) fragment_lines = self._get_lines_from_node_text(elem) self._add_fragment(syncmap=syncmap, identifier=identifier, language=language, lines=fragment_lines, begin=begin, end=end)
def parse(self, input_text, syncmap): from lxml import etree ttml_ns = "{http://www.w3.org/ns/ttml}" xml_ns = "{http://www.w3.org/XML/1998/namespace}" root = etree.fromstring(gf.safe_bytes(input_text)) language = root.get(xml_ns + "lang") for elem in root.iter(ttml_ns + "p"): identifier = gf.safe_unicode(elem.get(xml_ns + "id")) begin = gf.time_from_ttml(elem.get("begin")) end = gf.time_from_ttml(elem.get("end")) fragment_lines = self._get_lines_from_node_text(elem) self._add_fragment( syncmap=syncmap, identifier=identifier, language=language, lines=fragment_lines, begin=begin, end=end )
def test_time_from_ttml(self): tests = [ [None, 0], ["", 0], ["s", 0], ["0s", 0], ["000s", 0], ["1s", 1], ["001s", 1], ["1s", 1], ["001.234s", 1.234], ] for test in tests: self.assertEqual(gf.time_from_ttml(test[0]), test[1])
def test_time_from_ttml(self): tests = [ (None, TimeValue("0")), ("", TimeValue("0")), ("s", TimeValue("0")), ("0s", TimeValue("0")), ("000s", TimeValue("0")), ("1s", TimeValue("1")), ("001s", TimeValue("1")), ("1s", TimeValue("1")), ("001.234s", TimeValue("1.234")), ] for test in tests: self.assertEqual(gf.time_from_ttml(test[0]), test[1])