(match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"), # remove spaces before punctuation NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]))'), r"\1", name="CM_punctuation_space"), ] post_processors = empty_line_post_processors class RemoveTags(SubtitleModification): identifier = "remove_tags" description = "Remove all style tags" exclusive = True modifies_whole_file = True long_description = """\ Removes all possible style tags from the subtitle, such as font, bold, color etc. """ def modify(self, content, debug=False, parent=None, **kwargs): for entry in parent.f: # this actually plaintexts the entry and by re-assigning it to plaintext, it replaces \n with \N again entry.plaintext = entry.plaintext registry.register(CommonFixes) registry.register(RemoveTags)
# coding=utf-8 import logging from subzero.modification.mods import SubtitleModification from subzero.modification import registry logger = logging.getLogger(__name__) class ChangeFPS(SubtitleModification): identifier = "change_FPS" description = "Change the FPS of the subtitle" exclusive = True advanced = True modifies_whole_file = True long_description = """\ Re-syncs the subtitle to the framerate of the current media file. """ def modify(self, content, debug=False, parent=None, **kwargs): fps_from = kwargs.get("from") fps_to = kwargs.get("to") parent.f.transform_framerate(float(fps_from), float(fps_to)) registry.register(ChangeFPS)
("dark-red", "#800000"), ("dark-green", "#008000"), ("dark-yellow", "#808000"), ("dark-blue", "#000080"), ("dark-magenta", "#800080"), ("dark-cyan", "#008080"), ("dark-grey", "#808080"), ]) class Color(SubtitleModification): identifier = "color" description = "Change the color of the subtitle" exclusive = True advanced = True colors = COLOR_MAP long_description = """\ Adds the requested color to every line of the subtitle. Support depends on player. """ def modify(self, content, debug=False, parent=None, **kwargs): color = self.colors.get(kwargs.get("name")) if color: return u'<font color="%s">%s</font>' % (color, content) return content registry.register(Color)
class ShiftOffset(SubtitleModification): identifier = "shift_offset" description = "Change the timing of the subtitle" exclusive = False advanced = True args_mergeable = True modifies_whole_file = True long_description = "Adds or substracts a certain amount of time from the whole subtitle to match your media" @classmethod def merge_args(cls, args1, args2): new_args = dict((key, int(value)) for key, value in args1.iteritems()) for key, value in args2.iteritems(): if key in new_args: new_args[key] += int(value) else: new_args[key] = int(value) return new_args def modify(self, content, debug=False, parent=None, **kwargs): parent.f.shift(h=int(kwargs.get("h", 0)), m=int(kwargs.get("m", 0)), s=int(kwargs.get("s", 0)), ms=int(kwargs.get("ms", 0))) registry.register(ShiftOffset)
identifier = "shift_offset" description = "Change the timing of the subtitle" exclusive = False advanced = True args_mergeable = True modifies_whole_file = True long_description = "Adds or substracts a certain amount of time from the whole subtitle to match your media" @classmethod def merge_args(cls, args1, args2): new_args = dict((key, int(value)) for key, value in args1.iteritems()) for key, value in args2.iteritems(): if not int(value): continue if key in new_args: new_args[key] += int(value) else: new_args[key] = int(value) return dict(filter(lambda (k, v): bool(v), new_args.iteritems())) def modify(self, content, debug=False, parent=None, **kwargs): parent.f.shift(h=int(kwargs.get("h", 0)), m=int(kwargs.get("m", 0)), s=int(kwargs.get("s", 0)), ms=int(kwargs.get("ms", 0))) registry.register(ShiftOffset)
name="CM_RTL_reverse") ] split_upper_re = re.compile(ur"(\s*[.!?♪\-]\s*)") class FixUppercase(SubtitleModification): identifier = "fix_uppercase" description = "Fixes all-uppercase subtitles" modifies_whole_file = True exclusive = True order = 41 only_uppercase = True apply_last = True long_description = "Some subtitles are in all-uppercase letters. This at least makes them readable." def capitalize(self, c): return u"".join([s.capitalize() for s in split_upper_re.split(c)]) def modify(self, content, debug=False, parent=None, **kwargs): for entry in parent.f: entry.plaintext = self.capitalize(entry.plaintext) registry.register(CommonFixes) registry.register(RemoveTags) registry.register(ReverseRTL) registry.register(FixUppercase)
re.compile(ur'(?u)(^.*(?<=[A-ZÀ-Ž]{2})[A-ZÀ-Ž-_\s0-9"\']+:\s*)'), "", name="HI_before_colon"), # text in brackets at start, after optional dash, before colon or at end of line # fixme: may be too aggressive NReProcessor(re.compile( ur'(?um)(^-?\s?[([][A-zÀ-ž-_\s]{3,}[)\]](?:(?=$)|:\s*))'), "", name="HI_brackets_special"), # all caps line (at least 4 consecutive uppercase chars) NReProcessor(re.compile(ur'(?u)(^(?=.*[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+$)'), "", name="HI_all_caps"), # dash in front # NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"), # all caps at start before new sentence NReProcessor(re.compile( ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1", name="HI_starting_upper_then_sentence"), ] post_processors = empty_line_post_processors registry.register(HearingImpaired)
data_dict = OCR_fix_data.get(parent.language.alpha3t) if not data_dict: logger.debug("No SnR-data available for language %s", parent.language) return self.data_dict = data_dict self.processors = self.get_processors() def get_processors(self): if not self.data_dict: return [] return [ WholeLineProcessor(self.data_dict["WholeLines"], name="SE_replace_line"), MultipleWordReProcessor(self.data_dict["WholeWords"], name="SE_replace_word"), MultipleWordReProcessor(self.data_dict["BeginLines"], name="SE_replace_beginline"), MultipleWordReProcessor(self.data_dict["EndLines"], name="SE_replace_endline"), MultipleWordReProcessor(self.data_dict["PartialLines"], name="SE_replace_partialline"), MultipleLineProcessor(self.data_dict["PartialWordsAlways"], name="SE_replace_partialwordsalways") ] registry.register(FixOCR)
def modify(self, content, debug=False, parent=None, **kwargs): for entry in parent.f: # this actually plaintexts the entry and by re-assigning it to plaintext, it replaces \n with \N again entry.plaintext = entry.plaintext class ReverseRTL(SubtitleModification): identifier = "reverse_rtl" description = "Reverse punctuation in RTL languages" exclusive = True order = 50 languages = [Language("heb")] long_description = """\ Some playback devices don't properly handle right-to-left markers for punctuation. Physically swap punctuation. Applicable to languages: hebrew """ processors = [ # new? (?u)(^([\s.!?]*)(.+?)(\s*)(-?\s*)$); \5\4\3\2 #NReProcessor(re.compile(ur"(?u)((?=(?<=\b|^)|(?<=\s))([.!?-]+)([^.!?-]+)(?=\b|$|\s))"), r"\3\2", # name="CM_RTL_reverse") NReProcessor(re.compile(ur"(?u)(^([\s.!?]*)(.+?)(\s*)(-?\s*)$)"), r"\5\4\3\2", name="CM_RTL_reverse") ] registry.register(CommonFixes) registry.register(RemoveTags) registry.register(ReverseRTL)
# coding=utf-8 import logging from subzero.modification.mods import SubtitleModification from subzero.modification import registry logger = logging.getLogger(__name__) class ChangeFPS(SubtitleModification): identifier = "change_FPS" description = "Change the FPS of the subtitle" exclusive = True advanced = True modifies_whole_file = True long_description = "Re-syncs the subtitle to the framerate of the current media file." def modify(self, content, debug=False, parent=None, **kwargs): fps_from = kwargs.get("from") fps_to = kwargs.get("to") parent.f.transform_framerate(float(fps_from), float(fps_to)) registry.register(ChangeFPS)
("dark-red", "#800000"), ("dark-green", "#008000"), ("dark-yellow", "#808000"), ("dark-blue", "#000080"), ("dark-magenta", "#800080"), ("dark-cyan", "#008080"), ("dark-grey", "#808080"), ]) class Color(SubtitleModification): identifier = "color" description = "Change the color of the subtitle" exclusive = True advanced = True modifies_whole_file = True apply_last = True colors = COLOR_MAP long_description = "Adds the requested color to every line of the subtitle. Support depends on player." def modify(self, content, debug=False, parent=None, **kwargs): color = self.colors.get(kwargs.get("name")) if color: for entry in parent.f: entry.text = u'<font color="%s">%s</font>' % (color, entry.text) registry.register(Color)
NReProcessor(re.compile(ur'(?u)([A-zÀ-ž][a-zà-ž]+)(I+)'), lambda match: ur'%s%s' % (match.group(1), "l" * len(match.group(2))), name="CM_uppercase_i_in_word"), # fix spaces in numbers (allows for punctuation: ,.:' (comma/dot only fixed if after space, those may be # countdowns otherwise); don't break up ellipses # fixme: maybe check whether it's a countdown (second part smaller than the first), otherwise handle default? NReProcessor(re.compile( r'(?u)([0-9]+[0-9:\']*(?<!\.\.)\s+(?!\.\.)[0-9,.:\']*(?=[0-9]+)[0-9,.:\'\s]+)(?=\s|$)' ), lambda match: match.group(1).replace(" ", ""), name="CM_spaces_in_numbers"), # uppercase after dot NReProcessor(re.compile(ur'(?u)((?:[^.\s])+\.\s+)([a-zà-ž])'), lambda match: ur'%s%s' % (match.group(1), match.group(2).upper()), name="CM_uppercase_after_dot"), # remove spaces before punctuation NReProcessor(re.compile(r'(?u)(?:(?<=^)|(?<=\w)) +([!?.,](?![!?.,]))'), r"\1", name="CM_punctuation_space"), ] post_processors = empty_line_post_processors registry.register(CommonFixes)
# text in brackets at start, after optional dash, before colon or at end of line # fixme: may be too aggressive #NReProcessor(re.compile(ur'(?um)(^-?\s?[([][A-zÀ-ž-_\s]{3,}[)\]](?:(?=$)|:\s*))'), "", # name="HI_brackets_special"), # all caps line (at least 4 consecutive uppercase chars) NReProcessor(re.compile(ur'(?u)(^(?=.*[A-ZÀ-Ž&+]{4,})[A-ZÀ-Ž-_\s&+]+$)'), "", name="HI_all_caps", supported=lambda p: not p.only_uppercase), # remove MAN: NReProcessor(re.compile(ur'(?suxi)(.*MAN:\s*)'), "", name="HI_remove_man"), # dash in front # NReProcessor(re.compile(r'(?u)^\s*-\s*'), "", name="HI_starting_dash"), # all caps at start before new sentence NReProcessor(re.compile(ur'(?u)^(?=[A-ZÀ-Ž]{4,})[A-ZÀ-Ž-_\s]+\s([A-ZÀ-Ž][a-zà-ž].+)'), r"\1", name="HI_starting_upper_then_sentence", supported=lambda p: not p.only_uppercase), # remove music symbols NReProcessor(re.compile(ur'(?u)(^%(t)s[*#¶♫♪\s]*%(t)s[*#¶♫♪\s]+%(t)s[*#¶♫♪\s]*%(t)s$)' % {"t": TAG}), "", name="HI_music_symbols_only"), ] post_processors = empty_line_post_processors registry.register(HearingImpaired)
data_dict = OCR_fix_data.get(parent.language.alpha3t) if not data_dict: logger.debug("No SnR-data available for language %s", parent.language) return self.data_dict = data_dict self.processors = self.get_processors() def get_processors(self): if not self.data_dict: return [] return [ # remove broken HI tag colons (ANNOUNCER'., ". instead of :) after at least 3 uppercase chars # don't modify stuff inside quotes NReProcessor(re.compile(ur'(?u)(^[^"\'’ʼ❜‘‛”“‟„]*(?<=[A-ZÀ-Ž]{3})[A-ZÀ-Ž-_\s0-9]+)' ur'(["\'’ʼ❜‘‛”“‟„]*[.,‚،⹁、;]+)(\s*)(?!["\'’ʼ❜‘‛”“‟„])'), r"\1:\3", name="OCR_fix_HI_colons", supported=lambda p: not p.only_uppercase), # fix F'bla NReProcessor(re.compile(ur'(?u)(\bF)(\')([A-zÀ-ž]*\b)'), r"\1\3", name="OCR_fix_F"), WholeLineProcessor(self.data_dict["WholeLines"], name="OCR_replace_line"), MultipleWordReProcessor(self.data_dict["WholeWords"], name="OCR_replace_word"), MultipleWordReProcessor(self.data_dict["BeginLines"], name="OCR_replace_beginline"), MultipleWordReProcessor(self.data_dict["EndLines"], name="OCR_replace_endline"), MultipleWordReProcessor(self.data_dict["PartialLines"], name="OCR_replace_partialline"), MultipleLineProcessor(self.data_dict["PartialWordsAlways"], name="OCR_replace_partialwordsalways") ] registry.register(FixOCR)