def test_1(self): subtitles = [ Subtitle(0, 0, "hello my name is micha"), Subtitle(0, 0, "bla my bla is micha"), Subtitle(0, 0, "he is up"), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "hello my name is michi", "type": "speech" }), ScriptEntity(**{ "character": "CHAR1", "text": "who he is", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw_merger.binary_distance, verbose=True) dtw_merger.pretty_print_grid(alignment)
def test_character_detection(self): subtitles = [ Subtitle(0, 0, "Hello sir"), Subtitle(0, 0, "my name is micha"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "Hallo Mr", "type": "speech" }), ScriptEntity(**{ "character": "CHAR1", "text": "my name is michi", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) nw.pretty_print_grid(alignment) assert len(alignment.subtitles) == 2 assert alignment.subtitles[0].character == "CHAR0" assert alignment.subtitles[1].character == "CHAR1"
def persist(ctx, host, user, passwd, title, shots, subtitles, chapters): """Persists data into a MongoDB.""" project = ctx.obj[PROJECT_KEY] if host and user and passwd: db = Database(host, user, passwd) else: db = Database() if title: project.keyframe_size = shot_detection.keyframe_size(project) project.keyframe_montage_size = shot_detection.keyframe_thumbnail_size( project) db.update_title(project) if shots: shots = SHOTS_KEY in ctx.obj and ctx.obj[SHOTS_KEY] if not shots: p = project.file_path(Project.File.shots) if Project.file_exists(p): data = project.read(Project.File.shots) shots = Shot.from_dicts(data) shots and db.update_shots(project, shots) if subtitles: if SUBTITLES_KEY in ctx.obj: subtitles = ctx.obj[SUBTITLES_KEY] else: p = project.file_path(Project.File.merged_subtitles) if Project.file_exists(p): data = project.read(Project.File.merged_subtitles) subtitles = Subtitle.from_dicts(data) else: data = project.read(Project.File.subtitles) subtitles = Subtitle.from_dicts(data) subtitles and db.update_subtitles(project, subtitles) if chapters: if CHAPTERS_KEY in ctx.obj: chapters = ctx.obj[CHAPTERS_KEY] else: p = project.file_path(Project.File.chapters) if Project.file_exists(p): data = project.read(Project.File.chapters) chapters = Chapter.from_dicts(data) else: data = project.read(Project.File.chapters) chapters = Chapter.from_dicts(data) chapters and db.update_chapters(project, chapters) db.close()
def characters(ctx, subtitles_path, movie_script, algorithm, partial): """Merges movie scripts and subtitles.""" project = ctx.obj[PROJECT_KEY] logger = ctx.obj[LOGGER_KEY] partial = 1 if partial is None else partial if subtitles_path: subtitles = Subtitle.from_dicts(path_utils.load_json(subtitles_path)) elif SUBTITLES_KEY in ctx.obj: subtitles = ctx.obj[SUBTITLES_KEY] else: data = project.read(Project.File.subtitles) subtitles = Subtitle.from_dicts(data) if movie_script is not None: obj = path_utils.load_json(movie_script) speeches = ScriptEntity.from_dicts(obj) elif SCRIPT_KEY in ctx.obj: speeches = ctx.obj[SCRIPT_KEY] else: data = project.read(Project.File.script) data = script_parser.clean_script( data) # remove when no longer required speeches = ScriptEntity.from_dicts(data) speeches = [speech for speech in speeches if speech.type == "speech"] if algorithm == CharactersAlgorithm.dtw.value: alignment = dtw_merger.run(speeches, subtitles, partial, logger, ctx.obj[VERBOSE_KEY]) elif algorithm == CharactersAlgorithm.nw.value: alignment = needleman_wunsch.run(speeches, subtitles, partial, logger, ctx.obj[VERBOSE_KEY]) else: alignment = dtw_merger.run(speeches, subtitles, partial, logger, ctx.obj[VERBOSE_KEY]) merged_subtitles = alignment.subtitles ctx.obj[SUBTITLES_KEY] = merged_subtitles data = objects_as_dict(merged_subtitles) data and project.write(data, Project.File.merged_subtitles) logger.write()
def test_from_dict(self): data = self._sut.as_dict(camel=True) result = Subtitle.from_dict(data) assert type(result) is Subtitle assert type(result.t1) is Timestamp assert type(result.t2) is Timestamp assert result.original_text == "stub" assert result.text == "stub" assert result.character == "char"
def export(ctx, subtitles, movie_script): """Export merge script and subtitles. (Testing)""" project = ctx.obj[PROJECT_KEY] if subtitles: data = project.read(Project.File.merged_subtitles) subtitles = Subtitle.from_dicts(data) export_subtitles(project, subtitles) if movie_script: data = project.read(Project.File.script) script = ScriptEntity.from_dicts(data) export_script(project, script)
class SubtitleModelTests(unittest.TestCase): def setUp(self): self._sut = Subtitle(0, 10, "stub", character="char") def test_model(self): assert type(self._sut.t1) == Timestamp assert type(self._sut.t2) == Timestamp assert self._sut.t1.millis == 0 assert self._sut.t2.millis == 10 def test_camel_case(self): result = self._sut.as_dict(camel=True) assert result is not None assert "originalText" in result assert "t1" in result assert "t2" in result assert result["t1"] == 0 assert result["t2"] == 10 def test_mongo_representation(self): result = self._sut.to_mongo_dict() assert result is not None assert result["t1"] == 0 assert result["t2"] == 10 def test_from_dict(self): data = self._sut.as_dict(camel=True) result = Subtitle.from_dict(data) assert type(result) is Subtitle assert type(result.t1) is Timestamp assert type(result.t2) is Timestamp assert result.original_text == "stub" assert result.text == "stub" assert result.character == "char"
def test_punctuation_removal(self): s1 = [Subtitle(0, 0, "m.")] s2 = [ ScriptEntity(**{ "character": "CHAR0", "text": ".m", "type": "speech" }) ] alignment = self._sut(s1, s2, logger=self._logger) assert len(alignment.vertical_index) == 1 assert len(alignment.horizontal_index) == 1
def test_bio_string(self): subtitles = [ Subtitle(0, 0, "GCATGCU"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "GATTACA", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) nw.pretty_print_grid(alignment)
def test_2(self): subtitles = [ Subtitle(0, 0, "It's cute?"), ] script_entities = [ ScriptEntity("CHAR0", "I sort of like it. I mean, it's cute."), ScriptEntity("CHAR1", "Cute?"), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw.levenstein_distance, verbose=True) dtw.pretty_print_grid(alignment) assert alignment.subtitles[0].character == "CHAR0"
def test_split_words(self): subtitles = [ Subtitle(0, 0, "Millage Ville, Georgia."), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "Milledgeville, Georgia", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, logger=self._logger) assert alignment.subtitles[0].character == "CHAR0"
def test_bio_string_2(self): subtitles = [ Subtitle(0, 0, "CNJRQCLU"), ] script_entities = [ ScriptEntity(**{ "character": "CHAR0", "text": "CJRQDLN", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, Weighting(1, -1, AdaptiveGapPenalty(-5, -1)), logger=self._logger) nw.pretty_print_grid(alignment)
def test_3(self): subtitles = [ Subtitle(0, 0, "hello what are we doing"), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "hello what is it we do", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw_merger.binary_distance, verbose=True) dtw_merger.pretty_print_grid(alignment)
def test_1(self): subtitles = [ Subtitle(0, 0, "You're playing music?"), ] script_entities = [ ScriptEntity( "CHAR0", "Yeah, they really want you... they really want you... they really do." ), ScriptEntity("CHAR1", "You guys are playing music?") ] alignment = self._sut(script_entities, subtitles, self._logger, dtw.levenstein_distance, verbose=True) dtw.pretty_print_grid(alignment) assert alignment.subtitles[0].character == "CHAR1"
def test_2(self): subtitles = [ Subtitle(0, 0, "hello my name is micha"), ] script_entities = [ ScriptEntity( **{ "character": "CHAR0", "text": "hello my name micha is", "type": "speech" }), ] alignment = self._sut(script_entities, subtitles, self._logger, dtw_merger.levenstein_distance, verbose=True) dtw_merger.pretty_print_grid(alignment)
def setUp(self): self._sut = Subtitle(0, 10, "stub", character="char")
def mock_subtitles(string_list): return [Subtitle(0, 0, string) for string in string_list]