Beispiel #1
0
    def _read_smil(self, input_file):
        """
        Read from SMIL file.

        Limitations:
        1. parses only <par> elements, in order
        2. timings must have hh:mm:ss.mmm or ss.mmm format (autodetected)
        3. both clipBegin and clipEnd attributes of <audio> must be populated
        """
        smil_ns = "{http://www.w3.org/ns/SMIL}"
        contents = input_file.read()
        root = etree.fromstring(contents.encode("utf-8"))
        for par in root.iter(smil_ns + "par"):
            for child in par:
                if child.tag == (smil_ns + "text"):
                    identifier = gf.split_url(child.get("src"))[1]
                elif child.tag == (smil_ns + "audio"):
                    begin = gf.time_from_hhmmssmmm(child.get("clipBegin"))
                    if begin is None:
                        begin = gf.time_from_ssmmm(child.get("clipBegin"))
                    end = gf.time_from_hhmmssmmm(child.get("clipEnd"))
                    if end is None:
                        end = gf.time_from_ssmmm(child.get("clipEnd"))
            # TODO read text from additional text_file?
            text = u""
            text_fragment = TextFragment(identifier=identifier, lines=[text])
            sm_fragment = SyncMapFragment(text_fragment, begin, end)
            self.append(sm_fragment)
Beispiel #2
0
    def parse(self, input_text, syncmap):
        """
        Read from SMIL file.

        Limitations:
        1. parses only ``<par>`` elements, in order
        2. timings must have ``hh:mm:ss.mmm`` or ``ss.mmm`` format (autodetected)
        3. both ``clipBegin`` and ``clipEnd`` attributes of ``<audio>`` must be populated
        """
        from lxml import etree
        smil_ns = "{http://www.w3.org/ns/SMIL}"
        root = etree.fromstring(gf.safe_bytes(input_text))
        for par in root.iter(smil_ns + "par"):
            for child in par:
                if child.tag == (smil_ns + "text"):
                    identifier = gf.safe_unicode(gf.split_url(child.get("src"))[1])
                elif child.tag == (smil_ns + "audio"):
                    begin_text = child.get("clipBegin")
                    if ":" in begin_text:
                        begin = gf.time_from_hhmmssmmm(begin_text)
                    else:
                        begin = gf.time_from_ssmmm(begin_text)
                    end_text = child.get("clipEnd")
                    if ":" in end_text:
                        end = gf.time_from_hhmmssmmm(end_text)
                    else:
                        end = gf.time_from_ssmmm(end_text)
            # TODO read text from additional text_file?
            self._add_fragment(
                syncmap=syncmap,
                identifier=identifier,
                lines=[u""],
                begin=begin,
                end=end
            )
Beispiel #3
0
 def test_split_url(self):
     tests = [
         [None, [None, None]],
         ["", ["", None]],
         ["foo", ["foo", None]],
         ["foo.html", ["foo.html", None]],
         ["foo.html#", ["foo.html", ""]],
         ["foo.html#id", ["foo.html", "id"]],
         ["foo.html#id#bad", ["foo.html", "id"]],
     ]
     for test in tests:
         self.assertEqual(gf.split_url(test[0]), test[1])
Beispiel #4
0
 def test_split_url(self):
     tests = [
         (None, (None, None)),
         ("", ("", None)),
         ("foo", ("foo", None)),
         ("foo.html", ("foo.html", None)),
         ("foo.html#", ("foo.html", "")),
         ("foo.html#id", ("foo.html", "id")),
         ("foo.html#id#bad", ("foo.html", "id")),
     ]
     for test in tests:
         self.assertEqual(gf.split_url(test[0]), test[1])
Beispiel #5
0
 def test_split_url(self):
     tests = [
         [None, [None, None]],
         ["", ["", None]],
         ["foo", ["foo", None]],
         ["foo.html", ["foo.html", None]],
         ["foo.html#", ["foo.html", ""]],
         ["foo.html#id", ["foo.html", "id"]],
         ["foo.html#id#bad", ["foo.html", "id"]],
     ]
     for test in tests:
         self.assertEqual(gf.split_url(test[0]), test[1])
 def test_split_url(self):
     tests = [
         (None, (None, None)),
         ("", ("", None)),
         ("foo", ("foo", None)),
         ("foo.html", ("foo.html", None)),
         ("foo.html#", ("foo.html", "")),
         ("foo.html#id", ("foo.html", "id")),
         ("foo.html#id#bad", ("foo.html", "id")),
     ]
     for test in tests:
         self.assertEqual(gf.split_url(test[0]), test[1])