Ejemplo n.º 1
0
    def test_1(self):
        subtitles = [
            Subtitle(0, 0, "hello my name is micha"),
            Subtitle(0, 0, "bla my bla is micha"),
            Subtitle(0, 0, "he is up"),
        ]

        script_entities = [
            ScriptEntity(
                **{
                    "character": "CHAR0",
                    "text": "hello my name is michi",
                    "type": "speech"
                }),
            ScriptEntity(**{
                "character": "CHAR1",
                "text": "who he is",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw_merger.binary_distance,
                              verbose=True)
        dtw_merger.pretty_print_grid(alignment)
Ejemplo n.º 2
0
    def test_character_detection(self):
        subtitles = [
            Subtitle(0, 0, "Hello sir"),
            Subtitle(0, 0, "my name is micha"),
        ]

        script_entities = [
            ScriptEntity(**{
                "character": "CHAR0",
                "text": "Hallo Mr",
                "type": "speech"
            }),
            ScriptEntity(**{
                "character": "CHAR1",
                "text": "my name is michi",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities, subtitles, logger=self._logger)
        nw.pretty_print_grid(alignment)

        assert len(alignment.subtitles) == 2
        assert alignment.subtitles[0].character == "CHAR0"
        assert alignment.subtitles[1].character == "CHAR1"
Ejemplo n.º 3
0
def run(url):
    script_text, encoding = load(url)
    parsed_entities = analyze_content(script_text, encoding)
    script = clean_script(parsed_entities)
    entities = [ScriptEntity.from_dict(d) for d in script]

    return entities
Ejemplo n.º 4
0
    def test_2(self):
        subtitles = [
            Subtitle(0, 0, "It's cute?"),
        ]

        script_entities = [
            ScriptEntity("CHAR0", "I sort of like it. I mean, it's cute."),
            ScriptEntity("CHAR1", "Cute?"),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw.levenstein_distance,
                              verbose=True)
        dtw.pretty_print_grid(alignment)
        assert alignment.subtitles[0].character == "CHAR0"
Ejemplo n.º 5
0
def characters(ctx, subtitles_path, movie_script, algorithm, partial):
    """Merges movie scripts and subtitles."""
    project = ctx.obj[PROJECT_KEY]
    logger = ctx.obj[LOGGER_KEY]

    partial = 1 if partial is None else partial

    if subtitles_path:
        subtitles = Subtitle.from_dicts(path_utils.load_json(subtitles_path))
    elif SUBTITLES_KEY in ctx.obj:
        subtitles = ctx.obj[SUBTITLES_KEY]
    else:
        data = project.read(Project.File.subtitles)
        subtitles = Subtitle.from_dicts(data)

    if movie_script is not None:
        obj = path_utils.load_json(movie_script)
        speeches = ScriptEntity.from_dicts(obj)
    elif SCRIPT_KEY in ctx.obj:
        speeches = ctx.obj[SCRIPT_KEY]
    else:
        data = project.read(Project.File.script)
        data = script_parser.clean_script(
            data)  # remove when no longer required
        speeches = ScriptEntity.from_dicts(data)

    speeches = [speech for speech in speeches if speech.type == "speech"]

    if algorithm == CharactersAlgorithm.dtw.value:
        alignment = dtw_merger.run(speeches, subtitles, partial, logger,
                                   ctx.obj[VERBOSE_KEY])
    elif algorithm == CharactersAlgorithm.nw.value:
        alignment = needleman_wunsch.run(speeches, subtitles, partial, logger,
                                         ctx.obj[VERBOSE_KEY])
    else:
        alignment = dtw_merger.run(speeches, subtitles, partial, logger,
                                   ctx.obj[VERBOSE_KEY])

    merged_subtitles = alignment.subtitles
    ctx.obj[SUBTITLES_KEY] = merged_subtitles

    data = objects_as_dict(merged_subtitles)
    data and project.write(data, Project.File.merged_subtitles)

    logger.write()
Ejemplo n.º 6
0
    def test_1(self):
        subtitles = [
            Subtitle(0, 0, "You're playing music?"),
        ]

        script_entities = [
            ScriptEntity(
                "CHAR0",
                "Yeah, they really want you... they really want you... they really do."
            ),
            ScriptEntity("CHAR1", "You guys are playing music?")
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw.levenstein_distance,
                              verbose=True)
        dtw.pretty_print_grid(alignment)
        assert alignment.subtitles[0].character == "CHAR1"
Ejemplo n.º 7
0
def export(ctx, subtitles, movie_script):
    """Export merge script and subtitles. (Testing)"""
    project = ctx.obj[PROJECT_KEY]

    if subtitles:
        data = project.read(Project.File.merged_subtitles)
        subtitles = Subtitle.from_dicts(data)
        export_subtitles(project, subtitles)

    if movie_script:
        data = project.read(Project.File.script)
        script = ScriptEntity.from_dicts(data)
        export_script(project, script)
Ejemplo n.º 8
0
def screenplay(ctx, url):
    """Parses movie scripts."""
    project = ctx.obj[PROJECT_KEY]

    if url:
        entities = script_parser.run(url)
        data = objects_as_dict(entities)
        data and project.write(data, Project.File.script)
    else:
        result = project.read(Project.File.script)
        entities = ScriptEntity.from_dicts(result)

    ctx.obj[VERBOSE_KEY] and pprint(entities)
    ctx.obj[SCRIPT_KEY] = entities
Ejemplo n.º 9
0
    def test_punctuation_removal(self):
        s1 = [Subtitle(0, 0, "m.")]
        s2 = [
            ScriptEntity(**{
                "character": "CHAR0",
                "text": ".m",
                "type": "speech"
            })
        ]

        alignment = self._sut(s1, s2, logger=self._logger)

        assert len(alignment.vertical_index) == 1
        assert len(alignment.horizontal_index) == 1
Ejemplo n.º 10
0
    def test_bio_string(self):
        subtitles = [
            Subtitle(0, 0, "GCATGCU"),
        ]

        script_entities = [
            ScriptEntity(**{
                "character": "CHAR0",
                "text": "GATTACA",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities, subtitles, logger=self._logger)
        nw.pretty_print_grid(alignment)
Ejemplo n.º 11
0
    def test_split_words(self):
        subtitles = [
            Subtitle(0, 0, "Millage Ville, Georgia."),
        ]

        script_entities = [
            ScriptEntity(
                **{
                    "character": "CHAR0",
                    "text": "Milledgeville, Georgia",
                    "type": "speech"
                }),
        ]

        alignment = self._sut(script_entities, subtitles, logger=self._logger)

        assert alignment.subtitles[0].character == "CHAR0"
Ejemplo n.º 12
0
    def test_bio_string_2(self):
        subtitles = [
            Subtitle(0, 0, "CNJRQCLU"),
        ]

        script_entities = [
            ScriptEntity(**{
                "character": "CHAR0",
                "text": "CJRQDLN",
                "type": "speech"
            }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              Weighting(1, -1, AdaptiveGapPenalty(-5, -1)),
                              logger=self._logger)
        nw.pretty_print_grid(alignment)
Ejemplo n.º 13
0
    def test_3(self):
        subtitles = [
            Subtitle(0, 0, "hello what are we doing"),
        ]

        script_entities = [
            ScriptEntity(
                **{
                    "character": "CHAR0",
                    "text": "hello what is it we do",
                    "type": "speech"
                }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw_merger.binary_distance,
                              verbose=True)
        dtw_merger.pretty_print_grid(alignment)
Ejemplo n.º 14
0
    def test_2(self):
        subtitles = [
            Subtitle(0, 0, "hello my name is micha"),
        ]

        script_entities = [
            ScriptEntity(
                **{
                    "character": "CHAR0",
                    "text": "hello my name micha is",
                    "type": "speech"
                }),
        ]

        alignment = self._sut(script_entities,
                              subtitles,
                              self._logger,
                              dtw_merger.levenstein_distance,
                              verbose=True)
        dtw_merger.pretty_print_grid(alignment)
def mock_speeches(char_text_list):
	return [ScriptEntity(char, text) for char, text in char_text_list]
Ejemplo n.º 16
0
def convert_to_script_entities(script):
    script_entities = [
        ScriptEntity.from_dict(script_dict) for script_dict in script
    ]
    return list(filter(lambda x: x.type == "speech", script_entities))