Beispiel #1
0
    def _load_ass(self):
        if self.ontology["kind"] == "ass":
            content = self._read()
            if content:
                caption = Caption(self.env)
                content = content.splitlines()
                index = 0
                formation = None
                for line in content:
                    if line == u"[Events]":
                        match = self.env.expression["ass formation line"].search(content[index + 1])
                        if match is not None:
                            formation = match.group(1).strip().replace(u" ", u"").split(u",")
                        break
                    index += 1

                if formation is not None:
                    start = formation.index("Start")
                    stop = formation.index("End")
                    text = formation.index("Text")
                    for line in content:
                        match = self.env.expression["ass subtitle line"].search(line)
                        if match is not None:
                            line = match.group(1).strip().split(",")
                            slide = Slide()
                            slide.begin.timecode = line[start]
                            slide.end.timecode = line[stop]
                            subtitle_text = u",".join(line[text:])
                            subtitle_text = self.env.expression["ass event command"].sub(
                                self.env.constant["empty string"], subtitle_text
                            )
                            subtitle_text = subtitle_text.replace(u"\n", ur"\N")
                            subtitle_text = self.env.expression["ass condense line breaks"].sub(ur"\N", subtitle_text)
                            subtitle_text = subtitle_text.split(ur"\N")
                            for line in subtitle_text:
                                slide.add(line)
                            caption.add(slide)

                caption.normalize()
                if caption.valid:
                    mtype = self.env.enumeration["mediainfo stream type"].find("text")
                    o = Ontology(self.env, mtype.node["namespace"])
                    o["stream type"] = u"text"
                    o["format"] = u"ASS"
                    o["language"] = self.ontology["language"]
                    o["content"] = caption.node
                    self._execution["crawl"]["stream"].append(o)
Beispiel #2
0
    def _load_srt(self):
        if self.ontology["kind"] == "srt":
            content = self._read()
            if content:
                caption = Caption(self.env)
                content = content.splitlines()
                current_slide_pointer = None
                next_slide_pointer = None
                current = None
                next = None
                last_line = len(content) - 1
                for index in range(len(content)):
                    if index == last_line and current_slide_pointer is not None:
                        # This is the last line
                        next_slide_pointer = index + 1

                    match = self.env.expression["srt time line"].search(content[index])
                    if match is not None and content[index - 1].strip().isdigit():
                        next = Slide()
                        next.begin.timecode = match.group(1)
                        next.end.timecode = match.group(2)
                        if current_slide_pointer is not None:
                            next_slide_pointer = index - 1
                        else:
                            # first block
                            current_slide_pointer = index - 1
                            current = next
                            next = None

                    if next_slide_pointer is not None:
                        for line in content[current_slide_pointer + 2 : next_slide_pointer]:
                            current.add(line)
                        caption.add(current)
                        current_slide_pointer = next_slide_pointer
                        next_slide_pointer = None
                        current = next
                        next = None

                caption.normalize()
                if caption.valid:
                    mtype = self.env.enumeration["mediainfo stream type"].find("text")
                    o = Ontology(self.env, mtype.node["namespace"])
                    o["stream type"] = u"text"
                    o["format"] = u"UTF-8"
                    o["language"] = self.ontology["language"]
                    o["content"] = caption.node
                    self._execution["crawl"]["stream"].append(o)