Example #1
0
 def test_len(self):
     assert len(Line("- <i>It's working.</i>")) == len("- It's working.")
     assert len(Line("{\an8}appears at top")) == len("appears at top")
     assert len(
         Line(
             '<i>Previously on <font color="#ffff00">"TV Show"</font>...</i>'
         )) == len('Previously on "TV Show"...')
Example #2
0
 def test_fix_styles(self, processor: StyleProcessor):
     assert processor.fix_styles(Line("<i></i>")) == ""
     assert processor.fix_styles(Line("<i> </i>")) == " "
     assert processor.fix_styles(Line("</i> <i>")) == " "
     assert processor.fix_styles(Line("<i></i><i></i>")) == ""
     assert (processor.fix_styles(
         Line("<i></i> <i> </i> <i>sentence</i>")) == "   <i>sentence</i>")
Example #3
0
 def test_clean_parentheses(self, fake_processor: SDHProcessor):
     assert (fake_processor.clean_parentheses(
         Line("that's for you. [sighs]")) == "that's for you. ")
     assert (fake_processor.clean_parentheses(
         Line("on my part, I mean, [laughs] utter idiocy.")) ==
             "on my part, I mean,  utter idiocy.")
     assert (fake_processor.clean_parentheses(
         Line(
             "telling a joke [laughs], I mean, [continues laughing] you should've seen him."
         )) == "telling a joke , I mean,  you should've seen him.")
Example #4
0
 def test_strip(self):
     assert Line("").strip() == ""
     assert Line(" ").strip() == ""
     assert Line(" ").strip() == ""
     assert Line("test").strip() == "test"
     assert Line(" test").strip() == "test"
     assert Line(" test  ").strip() == "test"
     assert Line("test <i>").strip() == "test<i>"
     assert Line("<i>  test").strip() == "<i>test"
     assert Line(" <i>test").strip() == "<i>test"
     assert Line("<i>test</i>").strip() == "<i>test</i>"
Example #5
0
 def process_section(cls, section: Section) -> Section:
     if not len(section) > 1:
         return section
     chunks = cls.split_dialog_chunks(section.lines)
     section.lines = []
     for chunk in chunks:
         if not len(chunk) > 1:
             section.lines.append(chunk[0])
         elif cls.is_short(Line.merge(chunk)):
             section.lines.append(Line.merge(chunk))
         else:
             section.lines += chunk
     return section
Example #6
0
 def clean_hi(cls, line: Line) -> Line:
     """Clean hearing impaired."""
     line = line.sub(
         r"^([-‐\s<i>]+)?((\b[-A-Za-z.']+\s?#?\d?){1,2}(?!\.)([\[(][\w\s]*[\])])?:(?!\w)|[\[]+.*[\]:]+)(<\/?i>)?([\s])*",
         r"\1\5",
     )
     line = cls.clean_parentheses(line)
     return line
Example #7
0
 def test_contains_hi(self, fake_processor: SDHProcessor):
     assert fake_processor.contains_hi(Line("that's for you. [sighs]"))
     assert fake_processor.contains_hi(Line("‐TEACHER: blabla..."))
     assert fake_processor.contains_hi(Line("[Laura] sentence"))
     assert fake_processor.contains_hi(
         Line("<i>[Laura]</i> <i>sentence</i>"))
     assert fake_processor.contains_hi(Line("- CHRISTOPHER:<i> Hello?</i>"))
     assert not fake_processor.contains_hi(
         Line("9:17 a.m., to be specific,"))
     assert not fake_processor.contains_hi(Line("between 4:00 and 6:00."))
     assert not fake_processor.contains_hi(
         Line("I got some time between 4:00 and 6:00."))
Example #8
0
 def test_in_blacklist(self, fake_processor: BlacklistProcessor):
     assert fake_processor.in_blacklist(
         Line("Advertise your product or brand here"))
     assert fake_processor.in_blacklist(
         Line("contact www.OpenSubtitles.org today"))
     assert fake_processor.in_blacklist(
         Line('<font color="#ffff00">Provided by username</font>'))
     assert fake_processor.in_blacklist(Line("[http://example.com]"))
     assert fake_processor.in_blacklist(Line("http://foo.network"))
     assert fake_processor.in_blacklist(
         Line("Visit https://another-example.com"))
     assert fake_processor.in_blacklist(Line("find more under subs.link"))
     assert fake_processor.in_blacklist(Line("twitter.com/username"))
Example #9
0
 def test_clean_dashes(self, processor: DialogProcessor):
     assert processor.clean_dashes(Line("-dialog.")) == "- dialog."
     assert processor.clean_dashes(Line("- dialog.")) == "- dialog."
     assert processor.clean_dashes(Line("i-in")) == "i-in"
     assert processor.clean_dashes(Line("<i>-dialog.</i>")) == "<i>- dialog.</i>"
     assert processor.clean_dashes(Line("<i>-</i>dialog.")) == "<i>- </i>dialog."
     assert processor.clean_dashes(Line("-...dialog.")) == "- ...dialog."
Example #10
0
 def test_is_parentheses(self, fake_processor: SDHProcessor):
     assert fake_processor.is_parentheses(Line("(distant shouting))"))
     assert fake_processor.is_parentheses(Line("[ distant shouting ]"))
     assert fake_processor.is_parentheses(Line("-[camera shutter]"))
     assert fake_processor.is_parentheses(
         Line("(distant shouting,")) is False
     assert fake_processor.is_parentheses(
         Line("weapons clashing))")) is False
     assert fake_processor.is_parentheses(
         Line("[laughing nervously]:")) is False
Example #11
0
 def test_fix_space_punctuation(self, processor: ErrorProcessor):
     assert (processor.fix_space_punctuation(
         Line("First sentence  . Second sentence ,  blabla.")) ==
             "First sentence. Second sentence, blabla.")
     assert (processor.fix_space_punctuation(
         Line("First sentence... Second sentence.")) ==
             "First sentence... Second sentence.")
     assert processor.fix_space_punctuation(Line("Whoa ...")) == "Whoa..."
     assert processor.fix_space_punctuation(Line("Whoa...")) == "Whoa..."
     assert (processor.fix_space_punctuation(
         Line("Yeah. ..maybe.")) == "Yeah...maybe.")
     assert (processor.fix_space_punctuation(
         Line("Begin... ...end.")) == "Begin... ...end.")
     assert processor.fix_space_punctuation(
         Line("- ...dialog")) == "- ...dialog"
Example #12
0
 def test_clean_hi(self, fake_processor: SDHProcessor):
     assert (fake_processor.clean_hi(
         Line("that's for you. [sighs]")) == "that's for you. ")
     assert fake_processor.clean_hi(
         Line("‐TEACHER: blabla...")) == "‐blabla..."
     assert fake_processor.clean_hi(Line("[Laura] sentence")) == "sentence"
     assert (fake_processor.clean_hi(
         Line("<i>[Laura]</i> <i>sentence</i>")) == "<i></i><i>sentence</i>"
             )
     assert (fake_processor.clean_hi(
         Line("- CHRISTOPHER:<i> Hello?</i>")) == "- <i>Hello?</i>")
     assert (fake_processor.clean_hi(
         Line("9:17 a.m., to be specific,")) == "9:17 a.m., to be specific,"
             )
Example #13
0
 def test_strip_styles(self):
     assert Line(
         "- <i>It's working.</i>").strip_styles() == "- It's working."
     assert Line("{\an8}appears at top").strip_styles() == "appears at top"
Example #14
0
 def test_is_dialog(self):
     assert Line("- <i>This is a dialog.</i>").is_dialog()
     assert Line("-this is also a dialog").is_dialog()
     assert not Line("not a dialog").is_dialog()
Example #15
0
 def fix_spaces(line: Line) -> Line:
     """Add missing spaces between sentences."""
     return line.sub(r"\b([.?!]+)([A-Z][a-z])", r"\1 \2")
Example #16
0
 def trim_whitespace(line: Line) -> Line:
     """Trim multiple spaces between words, also if there are style tags in between"""
     return line.sub(r"\s+(<\/?i>)*\s*", r" \1").strip()
 def test_split_dialog_chunks(self, processor: LineLengthProcessor):
     assert processor.split_dialog_chunks([Line("hello"),
                                           Line("there")]) == [[
                                               Line("hello"),
                                               Line("there"),
                                           ]]
     assert processor.split_dialog_chunks([Line("- hello"),
                                           Line("there")]) == [[
                                               Line("- hello"),
                                               Line("there"),
                                           ]]
     assert processor.split_dialog_chunks([Line("hello"),
                                           Line("- there")]) == [
                                               [Line("hello")],
                                               [Line("- there")],
                                           ]
     assert processor.split_dialog_chunks(
         [Line("hello"), Line("- there"),
          Line("man")]) == [
              [Line("hello")],
              [Line("- there"), Line("man")],
          ]
     assert processor.split_dialog_chunks([Line("- hi"),
                                           Line("- bye")]) == [
                                               [Line("- hi")],
                                               [Line("- bye")],
                                           ]
     assert processor.split_dialog_chunks(
         [Line("- hi"),
          Line("bob"),
          Line("- bye"),
          Line("bob")]) == [[Line("- hi"), Line("bob")],
                            [Line("- bye"), Line("bob")]]
     assert processor.split_dialog_chunks(
         [Line("hi"), Line("-bob"),
          Line("- bye"),
          Line("bob")]) == [[Line("hi")], [Line("-bob")],
                            [Line("- bye"), Line("bob")]]
     assert processor.split_dialog_chunks([
         Line("-I'm gonna call the police,"),
         Line("this can't keep happening.")
     ]) == [[
         Line("-I'm gonna call the police,"),
         Line("this can't keep happening.")
     ]]
Example #18
0
 def fix_styles(line: Line) -> Line:
     """Remove leftover style tags"""
     return line.sub(r"<\/?i>(\s*)<\/?i>", r"\1")
Example #19
0
 def join(self) -> Line:
     return Line(" ".join(self.lines))
Example #20
0
 def test_is_simple_hi(self, fake_processor: SDHProcessor):
     assert fake_processor.is_simple_hi(Line("♪"))
     assert fake_processor.is_simple_hi(Line("- ♪ ♪"))
Example #21
0
 def fix_space_punctuation(line: Line) -> Line:
     line = line.sub(r"(?<!^-)(?<!\.{3})\s+([.,!?]+)",
                     r"\1")  # remove space before punctuation
     line = line.sub(r"([.,!?]+)\s{2,}(?!$)",
                     r"\1 ")  # fix multiple spaces after punctuation
     return line
Example #22
0
 def fix_hyphen(line: Line) -> Line:
     return line.sub(r"'’", "'")
Example #23
0
 def test_is_hi(self, fake_processor: SDHProcessor):
     assert fake_processor.is_hi(Line("[camera shutter]"))
     assert fake_processor.is_hi(Line("-[camera shutter]"))
     assert fake_processor.is_hi(Line("-  [camera shutter]"))
     assert fake_processor.is_hi(Line("(distant shouting))"))
     assert fake_processor.is_hi(Line("[ distant shouting ]"))
     assert fake_processor.is_hi(Line("♪"))
     assert fake_processor.is_hi(Line("- ♪ ♪"))
     assert fake_processor.is_hi(Line("(distant shouting,")) is False
     assert fake_processor.is_hi(Line("weapons clashing))")) is False
     assert fake_processor.is_hi(Line("-[journalists] Christine!")) is False
     assert fake_processor.is_hi(Line("- TAMIKA: Yeah.")) is False
     assert fake_processor.is_hi(Line("♪ (SOFT PIANO MUSIC PLAYS)) ♪"))
Example #24
0
 def clean_dashes(line: Line) -> Line:
     return line.sub(r"^(<\/?i>)*([-‐]+)(\s+)?", r"\1- ")
Example #25
0
 def clean_parentheses(line: Line) -> Line:
     """Clean parentheses ()[]."""
     return line.sub(r"[(\[*].*?[)\]*:]+", "")
Example #26
0
 def fix_quote(line: Line) -> Line:
     return line.sub(r"&quot;", '"')
Example #27
0
 def is_parenthesis_not_matching(line: Line) -> bool:
     return bool(
         re.search(r"[()\[\]]", line)
         and (line.count("(") != line.count(")")
              or line.count("[") != line.count("]")))
Example #28
0
 def test_is_music(self, fake_processor: SDHProcessor):
     assert fake_processor.is_music(Line("♪ ominous music ♪"))
     assert fake_processor.is_music(Line("- ♪ mysterious music ♪"))
     assert fake_processor.is_music(Line("♪ somber music ♪"))
     assert fake_processor.is_music(Line("♪ foreboding music ♪"))
     assert fake_processor.is_music(Line("♪ chilling music ♪"))
     assert fake_processor.is_music(Line("♪ solemn music ♪"))
     assert fake_processor.is_music(Line(" ♪ dramatic music ♪"))
     assert fake_processor.is_music(Line("♪ poignant music ♪"))
     assert fake_processor.is_music(Line("♪ emotional music ♪"))
     assert fake_processor.is_music(Line("♪ uneasy music ♪"))
     assert fake_processor.is_music(Line("♪ harrowing music ♪"))
     assert fake_processor.is_music(Line("♪ sinister music ♪"))
     assert fake_processor.is_music(Line("♪ upbeat music plays ♪"))
     assert fake_processor.is_music(Line("♪ gentle music ♪"))
     assert fake_processor.is_music(Line("♪ light orchestral music ♪"))
     assert fake_processor.is_music(Line("♪ upbeat folk music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(Line("♪ ominous music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(Line("♪ upbeat music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(Line("♪ smooth music playing ♪"))
     assert fake_processor.is_music(Line("♪ dance music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(
         Line("♪ swelling orchestral music plays ♪"))
     assert fake_processor.is_music(Line("♪ dramatic music playing ♪"))
     assert fake_processor.is_music(
         Line("♪ upbeat song playing over speakers ♪"))
     assert fake_processor.is_music(
         Line("♪ upbeat song playing over headphones ♪"))
     assert fake_processor.is_music(Line("♪ soft, dramatic music ♪"))
     assert fake_processor.is_music(Line("♪ soft, stirring music ♪"))
     assert fake_processor.is_music(Line("♪ music intensifies ♪"))
     assert fake_processor.is_music(Line("♪ music swells ♪"))
     assert fake_processor.is_music(Line("♪ dark music swells ♪"))
     assert fake_processor.is_music(Line("♪ dramatic musical sting ♪"))
     assert fake_processor.is_music(Line("♪ uneasy musical crescendo ♪"))
     assert fake_processor.is_music(
         Line("♪ up‐tempo percussive music playing ♪"))
     assert fake_processor.is_music(Line("<i>quiet, eerie music </i>"))
     assert fake_processor.is_music(Line("<i>droning music fades in </i>"))
     assert fake_processor.is_music(Line("<i>women vocalizing eerily </i>"))
     assert not fake_processor.is_music(
         Line("this is not a music subtitle"))
Example #29
0
 def fix_ampersand(line: Line) -> Line:
     return line.sub(r"&amp;", "&")
Example #30
0
 def fix_music(line: Line) -> Line:
     return line.sub(r"^#\s", "♪ ")