def get_markup(self, text): # create a simple element so we can parse using etree # since srt uses html like tags as markup base = "<p>%s</p>" % text el = etree.fromstring(base) content = [el.text] base_span = '<span %s>%s</span>' for child in el.getchildren(): tag = child.tag if tag == 'b': content.append(base_span % ('fontWeight="bold"', child.text)) elif tag == 'i': content.append(base_span % ('fontStyle="italic"', child.text)) elif tag == 'u': content.append(base_span % ('textDecoration="underline"', child.text)) content.append(child.tail) if el.tail: content.append(el.tail.strip()) return "".join(filter(None, content)).replace("\n", "<br />") register(SRTParser)
has_subs = False total_items = len(xml) for i,item in enumerate(xml): duration = 0 start = int(float(item.get('start')) * 1000) if hasattr(item, 'duration'): duration = int(float(item.get('dur', 0)) * 1000) elif i+1 < total_items: # youtube sometimes omits the duration attribute # in this case we're displaying until the next sub # starts next_item = xml[i+1] duration = int(float(next_item.get('start')) * 1000) - start else: # hardcode the last sub duration at 3 seconds duration = 3000 end = start + duration text = item.text and unescape_html(item.text) or u'' self.sub_set.append_subtitle(start, end, text) has_subs = True if not has_subs: raise ValueError("No subs") except Exception as e: raise SubtitleParserError(original_error=e) return self.sub_set register(YoutubeParser)
) return output def get_markup(self, text): # create a simple element so we can parse using etree # since srt uses html like tags as markup base = "<p>%s</p>" % text el = etree.fromstring(base) content = [el.text] base_span = '<span %s>%s</span>' for child in el.getchildren(): tag = child.tag if tag == 'b': content.append(base_span % ('fontWeight="bold"', child.text)) elif tag == 'i': content.append(base_span % ('fontStyle="italic"', child.text)) elif tag == 'u': content.append(base_span % ('textDecoration="underline"', child.text)) content.append(child.tail) if el.tail: content.append(el.tail.strip()) return "".join(filter(None, content)).replace("\n", "<br />") register(SRTParser)
else: return None def get_markup(self, text): # create a simple element so we can parse using etree # webvtt uses html like tags as markup base = "<p>%s</p>" % text el = etree.fromstring(base) content = [el.text] base_span = '<span %s>%s</span>' for child in el.getchildren(): tag = child.tag if tag == 'b': content.append(base_span % ('fontWeight="bold"', child.text)) elif tag == 'i': content.append(base_span % ('fontStyle="italic"', child.text)) elif tag == 'u': content.append(base_span % ('textDecoration="underline"', child.text)) content.append(child.tail) if el.tail: content.append(el.tail.strip()) return "".join(filter(None, content)).replace("\n", "<br />") register(WEBVTTParser)
class JSONParser(BaseTextParser): file_type = 'json' def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True): self.input_string = input_string self.pattern = pattern self.language = language super(JSONParser, self).__init__(input_string, pattern, language=language, flags=[], eager_parse=eager_parse) def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) try: data = json.loads(self.input_string) except ValueError: raise SubtitleParserError("Invalid JSON data provided.") # Sort by the ``position`` key data = sorted(data, key=lambda k: k['position']) for sub in data: self.sub_set.append_subtitle(sub['start'], sub['end'], sub['text']) return self.sub_set register(JSONParser)
eager_parse=True): self.input_string = input_string self.pattern = pattern self.language = language super(JSONParser, self).__init__(input_string, pattern, language=language, flags=[], eager_parse=eager_parse) def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) try: data = json.loads(self.input_string) except ValueError: raise SubtitleParserError("Invalid JSON data provided.") # Sort by the ``position`` key data = sorted(data, key=lambda k: k['position']) for sub in data: self.sub_set.append_subtitle(sub['start'], sub['end'], sub['text']) return self.sub_set register(JSONParser)