def test_add_describing_letters():

    with open('test_files/shrek_script.txt', 'r') as inp:
        full_text = inp.readlines()

    first_list = label_lines.detect_amount_of_spaces(full_text)
    second_list = label_lines.give_spaces_label(full_text, first_list)
    text = ''.join(label_lines.add_describing_letters(full_text, second_list))
    for line in text:
        assert line.startswith(('M|', 'C|', 'D|', 'S|', 'N|', ''))
def test_give_spaces_label():

    with open('test_files/shrek_script.txt', 'r') as inp:
        full_text = inp.readlines()

    first_list = label_lines.detect_amount_of_spaces(full_text)
    second_list = label_lines.give_spaces_label(full_text, first_list)

    assert second_list['spaces_N'] == second_list['spaces_S'] == first_list[0]
    assert second_list['spaces_C'] == max(first_list)
def main(argv):

    filename = argv[1]

    with open(filename, 'r') as inp:
        script = inp.readlines()

    no_spaces = label_lines.detect_amount_of_spaces(script)

    dict_spaces_label = \
        label_lines.give_spaces_label(script, no_spaces)

    labelled_script = \
        label_lines.add_describing_letters(script, dict_spaces_label)

    script_dict = converter(labelled_script)

    with open('script.json', 'w') as output:
        json.dump(script_dict, output, indent=4)
예제 #4
def main(argv):
    Takes the file name/-path to the script file,
    applies the functions, and prints the number of
    scene descriptions in a movie.

    filename = argv[1]

    with open(filename, 'r') as inp:
        text = inp.readlines()

    # get the functions of program
    list_number_of_spaces = \
    dict_spaces_label = \
        label_lines.give_spaces_label(text, list_number_of_spaces)

    new_text = \
        "".join(label_lines.add_describing_letters(text, dict_spaces_label))
def compare_script_to_subtitles(script, subtitles):
    Compares all the sentences of the subtitles to all the sentences
    of the script to find the best matches. Will add the character to
    the subtitles and the time to the script if the match is higher than
    70%. Also calculates the total similarity of the dialogue.

        script(list): A list of the input script lines
        subtitles(str): A string of the subtitles file

        average_ratio(float): The similarity of the dialogue in percentage
        script_dict(dict): The new script, with timestamps
        subtitles_dict(dict): The new subtitles, with characters

    subtitles_dict = OrderedDict(order_text(subtitles))

    # Remove the <tags> from the text
    for item in subtitles_dict:
        subtitles_dict[item]['text'] = \
            re.sub('<.*?>', '', subtitles_dict[item]['text'])

    # merge subtitles for complete lines
    subtitle_dict_length = len(subtitles_dict)
    i = 1
    while i < subtitle_dict_length:
        subtitles_dict, i = process_subtitle(subtitles_dict, i)

    # process the script
    no_spaces = label_lines.detect_amount_of_spaces(script)

    dict_spaces_label = \
        label_lines.give_spaces_label(script, no_spaces)

    labelled_script = \
        label_lines.add_describing_letters(script, dict_spaces_label)

    script_dict = script_to_json.converter(labelled_script)

    # loop to compare the texts
    progress = [0, len(subtitles_dict)]

    average_ratio = [0, 0]

    for item in subtitles_dict:

        time = ''

        highest_ratio = 0

        for sub_sentence in subtitles_dict[item]['text']:

            character = ''

            for index in script_dict:

                if 'dialogue' in script_dict[index]:

                    dialogue_text = \

                    for d_sentence in dialogue_text:

                        ratio = SequenceMatcher(None, sub_sentence,

                        if ratio > highest_ratio:

                            highest_ratio = ratio
                            highest_D_match = index

                            if ratio >= 0.7:
                                time = subtitles_dict[item]['time']
                                character = script_dict[index]['character']

            if character != '':
                subtitles_dict[item]['character'] = character

            if time != '':
                script_dict[highest_D_match]['time'] = time

        average_ratio[0] += highest_ratio
        average_ratio[1] += 1

        progress[0] += 1


    for item in subtitles_dict:
        subtitles_dict[item]['text'] = ' '.join(subtitles_dict[item]['text'])

    average_ratio = (average_ratio[0] / average_ratio[1]) * 100

    return average_ratio, script_dict, subtitles_dict
def compare_script_to_subtitles(script, subtitles):

    subtitles_dict = OrderedDict(order_text(subtitles))

    # Remove the <tags> from the text
    for item in subtitles_dict:
        subtitles_dict[item]['text'] = \
            re.sub('<.*?>', '', subtitles_dict[item]['text'])

    # merge subtitles for complete lines
    subtitle_dict_length = len(subtitles_dict)
    i = 1
    while i < subtitle_dict_length:
        subtitles_dict, i = process_subtitle(subtitles_dict, i)

    no_spaces = label_lines.detect_amount_of_spaces(script)

    dict_spaces_label = \
        label_lines.give_spaces_label(script, no_spaces)

    labelled_script = \
        label_lines.add_describing_letters(script, dict_spaces_label)

    script_dict = script_to_json.converter(labelled_script)

    progress = [0, len(subtitles_dict)]

    average_ratio = [0, 0]

    for item in subtitles_dict:

        time = ''

        highest_ratio = 0

        for sub_sentence in subtitles_dict[item]['text']:

            character = ''

            for index in script_dict:

                if 'dialogue' in script_dict[index]:

                    dialogue_text = \

                    for d_sentence in dialogue_text:

                        ratio = SequenceMatcher(None, sub_sentence,

                        if ratio > highest_ratio:

                            highest_ratio = ratio
                            highest_D_match = index

                            if ratio >= 0.7:
                                time = subtitles_dict[item]['time']
                                character = script_dict[index]['character']

            if character != '':
                subtitles_dict[item]['character'] = character

            if time != '':
                script_dict[highest_D_match]['time'] = time

        average_ratio[0] += highest_ratio
        average_ratio[1] += 1

        progress[0] += 1

        print(f'{progress[0]}/{progress[1]}', file=sys.stderr)

    for item in subtitles_dict:
        subtitles_dict[item]['text'] = ' '.join(subtitles_dict[item]['text'])

    average_ratio = (average_ratio[0] / average_ratio[1]) * 100

    return average_ratio, script_dict, subtitles_dict