def test_len(self): assert len(Line("- <i>It's working.</i>")) == len("- It's working.") assert len(Line("{\an8}appears at top")) == len("appears at top") assert len( Line( '<i>Previously on <font color="#ffff00">"TV Show"</font>...</i>' )) == len('Previously on "TV Show"...')
def test_fix_styles(self, processor: StyleProcessor): assert processor.fix_styles(Line("<i></i>")) == "" assert processor.fix_styles(Line("<i> </i>")) == " " assert processor.fix_styles(Line("</i> <i>")) == " " assert processor.fix_styles(Line("<i></i><i></i>")) == "" assert (processor.fix_styles( Line("<i></i> <i> </i> <i>sentence</i>")) == " <i>sentence</i>")
def test_clean_parentheses(self, fake_processor: SDHProcessor): assert (fake_processor.clean_parentheses( Line("that's for you. [sighs]")) == "that's for you. ") assert (fake_processor.clean_parentheses( Line("on my part, I mean, [laughs] utter idiocy.")) == "on my part, I mean, utter idiocy.") assert (fake_processor.clean_parentheses( Line( "telling a joke [laughs], I mean, [continues laughing] you should've seen him." )) == "telling a joke , I mean, you should've seen him.")
def test_strip(self): assert Line("").strip() == "" assert Line(" ").strip() == "" assert Line(" ").strip() == "" assert Line("test").strip() == "test" assert Line(" test").strip() == "test" assert Line(" test ").strip() == "test" assert Line("test <i>").strip() == "test<i>" assert Line("<i> test").strip() == "<i>test" assert Line(" <i>test").strip() == "<i>test" assert Line("<i>test</i>").strip() == "<i>test</i>"
def process_section(cls, section: Section) -> Section: if not len(section) > 1: return section chunks = cls.split_dialog_chunks(section.lines) section.lines = [] for chunk in chunks: if not len(chunk) > 1: section.lines.append(chunk[0]) elif cls.is_short(Line.merge(chunk)): section.lines.append(Line.merge(chunk)) else: section.lines += chunk return section
def clean_hi(cls, line: Line) -> Line: """Clean hearing impaired.""" line = line.sub( r"^([-‐\s<i>]+)?((\b[-A-Za-z.']+\s?#?\d?){1,2}(?!\.)([\[(][\w\s]*[\])])?:(?!\w)|[\[]+.*[\]:]+)(<\/?i>)?([\s])*", r"\1\5", ) line = cls.clean_parentheses(line) return line
def test_contains_hi(self, fake_processor: SDHProcessor): assert fake_processor.contains_hi(Line("that's for you. [sighs]")) assert fake_processor.contains_hi(Line("‐TEACHER: blabla...")) assert fake_processor.contains_hi(Line("[Laura] sentence")) assert fake_processor.contains_hi( Line("<i>[Laura]</i> <i>sentence</i>")) assert fake_processor.contains_hi(Line("- CHRISTOPHER:<i> Hello?</i>")) assert not fake_processor.contains_hi( Line("9:17 a.m., to be specific,")) assert not fake_processor.contains_hi(Line("between 4:00 and 6:00.")) assert not fake_processor.contains_hi( Line("I got some time between 4:00 and 6:00."))
def test_in_blacklist(self, fake_processor: BlacklistProcessor): assert fake_processor.in_blacklist( Line("Advertise your product or brand here")) assert fake_processor.in_blacklist( Line("contact www.OpenSubtitles.org today")) assert fake_processor.in_blacklist( Line('<font color="#ffff00">Provided by username</font>')) assert fake_processor.in_blacklist(Line("[http://example.com]")) assert fake_processor.in_blacklist(Line("http://foo.network")) assert fake_processor.in_blacklist( Line("Visit https://another-example.com")) assert fake_processor.in_blacklist(Line("find more under subs.link")) assert fake_processor.in_blacklist(Line("twitter.com/username"))
def test_clean_dashes(self, processor: DialogProcessor): assert processor.clean_dashes(Line("-dialog.")) == "- dialog." assert processor.clean_dashes(Line("- dialog.")) == "- dialog." assert processor.clean_dashes(Line("i-in")) == "i-in" assert processor.clean_dashes(Line("<i>-dialog.</i>")) == "<i>- dialog.</i>" assert processor.clean_dashes(Line("<i>-</i>dialog.")) == "<i>- </i>dialog." assert processor.clean_dashes(Line("-...dialog.")) == "- ...dialog."
def test_is_parentheses(self, fake_processor: SDHProcessor): assert fake_processor.is_parentheses(Line("(distant shouting))")) assert fake_processor.is_parentheses(Line("[ distant shouting ]")) assert fake_processor.is_parentheses(Line("-[camera shutter]")) assert fake_processor.is_parentheses( Line("(distant shouting,")) is False assert fake_processor.is_parentheses( Line("weapons clashing))")) is False assert fake_processor.is_parentheses( Line("[laughing nervously]:")) is False
def test_fix_space_punctuation(self, processor: ErrorProcessor): assert (processor.fix_space_punctuation( Line("First sentence . Second sentence , blabla.")) == "First sentence. Second sentence, blabla.") assert (processor.fix_space_punctuation( Line("First sentence... Second sentence.")) == "First sentence... Second sentence.") assert processor.fix_space_punctuation(Line("Whoa ...")) == "Whoa..." assert processor.fix_space_punctuation(Line("Whoa...")) == "Whoa..." assert (processor.fix_space_punctuation( Line("Yeah. ..maybe.")) == "Yeah...maybe.") assert (processor.fix_space_punctuation( Line("Begin... ...end.")) == "Begin... ...end.") assert processor.fix_space_punctuation( Line("- ...dialog")) == "- ...dialog"
def test_clean_hi(self, fake_processor: SDHProcessor): assert (fake_processor.clean_hi( Line("that's for you. [sighs]")) == "that's for you. ") assert fake_processor.clean_hi( Line("‐TEACHER: blabla...")) == "‐blabla..." assert fake_processor.clean_hi(Line("[Laura] sentence")) == "sentence" assert (fake_processor.clean_hi( Line("<i>[Laura]</i> <i>sentence</i>")) == "<i></i><i>sentence</i>" ) assert (fake_processor.clean_hi( Line("- CHRISTOPHER:<i> Hello?</i>")) == "- <i>Hello?</i>") assert (fake_processor.clean_hi( Line("9:17 a.m., to be specific,")) == "9:17 a.m., to be specific," )
def test_strip_styles(self): assert Line( "- <i>It's working.</i>").strip_styles() == "- It's working." assert Line("{\an8}appears at top").strip_styles() == "appears at top"
def test_is_dialog(self): assert Line("- <i>This is a dialog.</i>").is_dialog() assert Line("-this is also a dialog").is_dialog() assert not Line("not a dialog").is_dialog()
def fix_spaces(line: Line) -> Line: """Add missing spaces between sentences.""" return line.sub(r"\b([.?!]+)([A-Z][a-z])", r"\1 \2")
def trim_whitespace(line: Line) -> Line: """Trim multiple spaces between words, also if there are style tags in between""" return line.sub(r"\s+(<\/?i>)*\s*", r" \1").strip()
def test_split_dialog_chunks(self, processor: LineLengthProcessor): assert processor.split_dialog_chunks([Line("hello"), Line("there")]) == [[ Line("hello"), Line("there"), ]] assert processor.split_dialog_chunks([Line("- hello"), Line("there")]) == [[ Line("- hello"), Line("there"), ]] assert processor.split_dialog_chunks([Line("hello"), Line("- there")]) == [ [Line("hello")], [Line("- there")], ] assert processor.split_dialog_chunks( [Line("hello"), Line("- there"), Line("man")]) == [ [Line("hello")], [Line("- there"), Line("man")], ] assert processor.split_dialog_chunks([Line("- hi"), Line("- bye")]) == [ [Line("- hi")], [Line("- bye")], ] assert processor.split_dialog_chunks( [Line("- hi"), Line("bob"), Line("- bye"), Line("bob")]) == [[Line("- hi"), Line("bob")], [Line("- bye"), Line("bob")]] assert processor.split_dialog_chunks( [Line("hi"), Line("-bob"), Line("- bye"), Line("bob")]) == [[Line("hi")], [Line("-bob")], [Line("- bye"), Line("bob")]] assert processor.split_dialog_chunks([ Line("-I'm gonna call the police,"), Line("this can't keep happening.") ]) == [[ Line("-I'm gonna call the police,"), Line("this can't keep happening.") ]]
def fix_styles(line: Line) -> Line: """Remove leftover style tags""" return line.sub(r"<\/?i>(\s*)<\/?i>", r"\1")
def join(self) -> Line: return Line(" ".join(self.lines))
def test_is_simple_hi(self, fake_processor: SDHProcessor): assert fake_processor.is_simple_hi(Line("♪")) assert fake_processor.is_simple_hi(Line("- ♪ ♪"))
def fix_space_punctuation(line: Line) -> Line: line = line.sub(r"(?<!^-)(?<!\.{3})\s+([.,!?]+)", r"\1") # remove space before punctuation line = line.sub(r"([.,!?]+)\s{2,}(?!$)", r"\1 ") # fix multiple spaces after punctuation return line
def fix_hyphen(line: Line) -> Line: return line.sub(r"'’", "'")
def test_is_hi(self, fake_processor: SDHProcessor): assert fake_processor.is_hi(Line("[camera shutter]")) assert fake_processor.is_hi(Line("-[camera shutter]")) assert fake_processor.is_hi(Line("- [camera shutter]")) assert fake_processor.is_hi(Line("(distant shouting))")) assert fake_processor.is_hi(Line("[ distant shouting ]")) assert fake_processor.is_hi(Line("♪")) assert fake_processor.is_hi(Line("- ♪ ♪")) assert fake_processor.is_hi(Line("(distant shouting,")) is False assert fake_processor.is_hi(Line("weapons clashing))")) is False assert fake_processor.is_hi(Line("-[journalists] Christine!")) is False assert fake_processor.is_hi(Line("- TAMIKA: Yeah.")) is False assert fake_processor.is_hi(Line("♪ (SOFT PIANO MUSIC PLAYS)) ♪"))
def clean_dashes(line: Line) -> Line: return line.sub(r"^(<\/?i>)*([-‐]+)(\s+)?", r"\1- ")
def clean_parentheses(line: Line) -> Line: """Clean parentheses ()[].""" return line.sub(r"[(\[*].*?[)\]*:]+", "")
def fix_quote(line: Line) -> Line: return line.sub(r""", '"')
def is_parenthesis_not_matching(line: Line) -> bool: return bool( re.search(r"[()\[\]]", line) and (line.count("(") != line.count(")") or line.count("[") != line.count("]")))
def test_is_music(self, fake_processor: SDHProcessor): assert fake_processor.is_music(Line("♪ ominous music ♪")) assert fake_processor.is_music(Line("- ♪ mysterious music ♪")) assert fake_processor.is_music(Line("♪ somber music ♪")) assert fake_processor.is_music(Line("♪ foreboding music ♪")) assert fake_processor.is_music(Line("♪ chilling music ♪")) assert fake_processor.is_music(Line("♪ solemn music ♪")) assert fake_processor.is_music(Line(" ♪ dramatic music ♪")) assert fake_processor.is_music(Line("♪ poignant music ♪")) assert fake_processor.is_music(Line("♪ emotional music ♪")) assert fake_processor.is_music(Line("♪ uneasy music ♪")) assert fake_processor.is_music(Line("♪ harrowing music ♪")) assert fake_processor.is_music(Line("♪ sinister music ♪")) assert fake_processor.is_music(Line("♪ upbeat music plays ♪")) assert fake_processor.is_music(Line("♪ gentle music ♪")) assert fake_processor.is_music(Line("♪ light orchestral music ♪")) assert fake_processor.is_music(Line("♪ upbeat folk music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music(Line("♪ ominous music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music(Line("♪ upbeat music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music(Line("♪ smooth music playing ♪")) assert fake_processor.is_music(Line("♪ dance music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music( Line("♪ swelling orchestral music plays ♪")) assert fake_processor.is_music(Line("♪ dramatic music playing ♪")) assert fake_processor.is_music( Line("♪ upbeat song playing over speakers ♪")) assert fake_processor.is_music( Line("♪ upbeat song playing over headphones ♪")) assert fake_processor.is_music(Line("♪ soft, dramatic music ♪")) assert fake_processor.is_music(Line("♪ soft, stirring music ♪")) assert fake_processor.is_music(Line("♪ music intensifies ♪")) assert fake_processor.is_music(Line("♪ music swells ♪")) assert fake_processor.is_music(Line("♪ dark music swells ♪")) assert fake_processor.is_music(Line("♪ dramatic musical sting ♪")) assert fake_processor.is_music(Line("♪ uneasy musical crescendo ♪")) assert fake_processor.is_music( Line("♪ up‐tempo percussive music playing ♪")) assert fake_processor.is_music(Line("<i>quiet, eerie music </i>")) assert fake_processor.is_music(Line("<i>droning music fades in </i>")) assert fake_processor.is_music(Line("<i>women vocalizing eerily </i>")) assert not fake_processor.is_music( Line("this is not a music subtitle"))
def fix_ampersand(line: Line) -> Line: return line.sub(r"&", "&")
def fix_music(line: Line) -> Line: return line.sub(r"^#\s", "♪ ")