Beispiel #1
0
    def get_markup(self, text):
        # create a simple element so we can parse using etree
        # since srt uses html like tags as markup
        base = "<p>%s</p>" % text
        el = etree.fromstring(base)

        content = [el.text]
        base_span = '<span %s>%s</span>'

        for child in el.getchildren():
            tag = child.tag

            if tag == 'b':
                content.append(base_span % ('fontWeight="bold"', child.text))
            elif tag == 'i':
                content.append(base_span % ('fontStyle="italic"', child.text))
            elif tag == 'u':
                content.append(base_span %
                               ('textDecoration="underline"', child.text))

            content.append(child.tail)

        if el.tail:
            content.append(el.tail.strip())

        return "".join(filter(None, content)).replace("\n", "<br />")


register(SRTParser)
Beispiel #2
0
                has_subs = False
                total_items = len(xml)
                for i,item in enumerate(xml):
                    duration = 0
                    start = int(float(item.get('start')) * 1000)
                    if hasattr(item, 'duration'):
                        duration = int(float(item.get('dur', 0)) * 1000)
                    elif i+1 < total_items:
                        # youtube sometimes omits the duration attribute
                        # in this case we're displaying until the next sub
                        # starts
                        next_item = xml[i+1]
                        duration = int(float(next_item.get('start')) * 1000) - start
                    else:
                        # hardcode the last sub duration at 3 seconds
                        duration = 3000
                    end = start + duration
                    text = item.text and unescape_html(item.text) or u''
                    self.sub_set.append_subtitle(start, end, text)
                    has_subs = True
                if not has_subs:
                    raise ValueError("No subs")
            except Exception as e:
                raise SubtitleParserError(original_error=e)


        return self.sub_set


register(YoutubeParser)
Beispiel #3
0
        )
        return output

    def get_markup(self, text):
        # create a simple element so we can parse using etree
        # since srt uses html like tags as markup
        base = "<p>%s</p>" % text
        el = etree.fromstring(base)

        content = [el.text]
        base_span = '<span %s>%s</span>'

        for child in el.getchildren():
            tag = child.tag

            if tag == 'b':
                content.append(base_span % ('fontWeight="bold"', child.text))
            elif tag == 'i':
                content.append(base_span % ('fontStyle="italic"', child.text))
            elif tag == 'u':
                content.append(base_span % ('textDecoration="underline"', child.text))

            content.append(child.tail)

        if el.tail:
            content.append(el.tail.strip())
            
        return "".join(filter(None, content)).replace("\n", "<br />")

register(SRTParser)
Beispiel #4
0
        else:
            return None

    def get_markup(self, text):
        # create a simple element so we can parse using etree
        # webvtt uses html like tags as markup
        base = "<p>%s</p>" % text
        el = etree.fromstring(base)

        content = [el.text]
        base_span = '<span %s>%s</span>'

        for child in el.getchildren():
            tag = child.tag

            if tag == 'b':
                content.append(base_span % ('fontWeight="bold"', child.text))
            elif tag == 'i':
                content.append(base_span % ('fontStyle="italic"', child.text))
            elif tag == 'u':
                content.append(base_span % ('textDecoration="underline"', child.text))

            content.append(child.tail)

        if el.tail:
            content.append(el.tail.strip())

        return "".join(filter(None, content)).replace("\n", "<br />")

register(WEBVTTParser)
Beispiel #5
0
class JSONParser(BaseTextParser):
    file_type = 'json'

    def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True):
        self.input_string = input_string
        self.pattern = pattern
        self.language = language
        super(JSONParser, self).__init__(input_string, pattern, language=language,
            flags=[], eager_parse=eager_parse)

    def to_internal(self):
        if not hasattr(self, 'sub_set'):
            self.sub_set = SubtitleSet(self.language)

            try:
                data = json.loads(self.input_string)
            except ValueError:
                raise SubtitleParserError("Invalid JSON data provided.")

            # Sort by the ``position`` key
            data = sorted(data, key=lambda k: k['position'])

            for sub in data:
                self.sub_set.append_subtitle(sub['start'], sub['end'],
                    sub['text'])

        return self.sub_set


register(JSONParser)
                 eager_parse=True):
        self.input_string = input_string
        self.pattern = pattern
        self.language = language
        super(JSONParser, self).__init__(input_string,
                                         pattern,
                                         language=language,
                                         flags=[],
                                         eager_parse=eager_parse)

    def to_internal(self):
        if not hasattr(self, 'sub_set'):
            self.sub_set = SubtitleSet(self.language)

            try:
                data = json.loads(self.input_string)
            except ValueError:
                raise SubtitleParserError("Invalid JSON data provided.")

            # Sort by the ``position`` key
            data = sorted(data, key=lambda k: k['position'])

            for sub in data:
                self.sub_set.append_subtitle(sub['start'], sub['end'],
                                             sub['text'])

        return self.sub_set


register(JSONParser)