Python add_describing_lettersの例

プログラミング言語: Python

名前空間/パッケージ名: label_lines

メソッド/関数: add_describing_letters

hotexamples.comのコード掲載数: 5

Python add_describing_letters - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのlabel_lines.add_describing_lettersの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test.py プロジェクト: bbjoverbeek/Error_Group_name_not_found

def test_add_describing_letters():

    with open('test_files/shrek_script.txt', 'r') as inp:
        full_text = inp.readlines()

    first_list = label_lines.detect_amount_of_spaces(full_text)
    second_list = label_lines.give_spaces_label(full_text, first_list)
    text = ''.join(label_lines.add_describing_letters(full_text, second_list))
    for line in text:
        assert line.startswith(('M|', 'C|', 'D|', 'S|', 'N|', ''))

コード例 #2

ファイルを表示

ファイル: script_to_json.py プロジェクト: bbjoverbeek/Error_Group_name_not_found

def main(argv):

    filename = argv[1]

    with open(filename, 'r') as inp:
        script = inp.readlines()

    no_spaces = label_lines.detect_amount_of_spaces(script)

    dict_spaces_label = \
        label_lines.give_spaces_label(script, no_spaces)

    labelled_script = \
        label_lines.add_describing_letters(script, dict_spaces_label)

    script_dict = converter(labelled_script)

    with open('script.json', 'w') as output:
        json.dump(script_dict, output, indent=4)

コード例 #3

ファイルを表示

def main(argv):
    """
    Takes the file name/-path to the script file,
    applies the functions, and prints the number of
    scene descriptions in a movie.
    """

    filename = argv[1]

    with open(filename, 'r') as inp:
        text = inp.readlines()

    # get the functions of program label_lines.py
    list_number_of_spaces = \
        label_lines.detect_amount_of_spaces(text)
    dict_spaces_label = \
        label_lines.give_spaces_label(text, list_number_of_spaces)

    new_text = \
        "".join(label_lines.add_describing_letters(text, dict_spaces_label))
    print(count_scenes(new_text))

コード例 #4

ファイルを表示

ファイル: compare.py プロジェクト: bbjoverbeek/Error_Group_name_not_found

def compare_script_to_subtitles(script, subtitles):
    '''
    Compares all the sentences of the subtitles to all the sentences
    of the script to find the best matches. Will add the character to
    the subtitles and the time to the script if the match is higher than
    70%. Also calculates the total similarity of the dialogue.

    Parameters:
        script(list): A list of the input script lines
        subtitles(str): A string of the subtitles file

    Returns:
        average_ratio(float): The similarity of the dialogue in percentage
        script_dict(dict): The new script, with timestamps
        subtitles_dict(dict): The new subtitles, with characters
    '''

    subtitles_dict = OrderedDict(order_text(subtitles))

    # Remove the <tags> from the text
    for item in subtitles_dict:
        subtitles_dict[item]['text'] = \
            re.sub('<.*?>', '', subtitles_dict[item]['text'])

    # merge subtitles for complete lines
    subtitle_dict_length = len(subtitles_dict)
    i = 1
    while i < subtitle_dict_length:
        subtitles_dict, i = process_subtitle(subtitles_dict, i)

    # process the script
    no_spaces = label_lines.detect_amount_of_spaces(script)

    dict_spaces_label = \
        label_lines.give_spaces_label(script, no_spaces)

    labelled_script = \
        label_lines.add_describing_letters(script, dict_spaces_label)

    script_dict = script_to_json.converter(labelled_script)

    # loop to compare the texts
    progress = [0, len(subtitles_dict)]

    average_ratio = [0, 0]

    for item in subtitles_dict:

        time = ''

        highest_ratio = 0

        for sub_sentence in subtitles_dict[item]['text']:

            character = ''

            for index in script_dict:

                if 'dialogue' in script_dict[index]:

                    dialogue_text = \
                        nltk.sent_tokenize(script_dict[index]['dialogue'])

                    for d_sentence in dialogue_text:

                        ratio = SequenceMatcher(None, sub_sentence,
                                                d_sentence).ratio()

                        if ratio > highest_ratio:

                            highest_ratio = ratio
                            highest_D_match = index

                            if ratio >= 0.7:
                                time = subtitles_dict[item]['time']
                                character = script_dict[index]['character']

            if character != '':
                subtitles_dict[item]['character'] = character

            if time != '':
                script_dict[highest_D_match]['time'] = time

        average_ratio[0] += highest_ratio
        average_ratio[1] += 1

        progress[0] += 1

        print(f'{progress[0]}/{progress[1]}')

    for item in subtitles_dict:
        subtitles_dict[item]['text'] = ' '.join(subtitles_dict[item]['text'])

    average_ratio = (average_ratio[0] / average_ratio[1]) * 100

    return average_ratio, script_dict, subtitles_dict

コード例 #5

ファイルを表示

ファイル: compare.py プロジェクト: bbjoverbeek/Error_Group_name_not_found

def compare_script_to_subtitles(script, subtitles):

    subtitles_dict = OrderedDict(order_text(subtitles))

    # Remove the <tags> from the text
    for item in subtitles_dict:
        subtitles_dict[item]['text'] = \
            re.sub('<.*?>', '', subtitles_dict[item]['text'])

    # merge subtitles for complete lines
    subtitle_dict_length = len(subtitles_dict)
    i = 1
    while i < subtitle_dict_length:
        subtitles_dict, i = process_subtitle(subtitles_dict, i)

    no_spaces = label_lines.detect_amount_of_spaces(script)

    dict_spaces_label = \
        label_lines.give_spaces_label(script, no_spaces)

    labelled_script = \
        label_lines.add_describing_letters(script, dict_spaces_label)

    script_dict = script_to_json.converter(labelled_script)

    progress = [0, len(subtitles_dict)]

    average_ratio = [0, 0]

    for item in subtitles_dict:

        time = ''

        highest_ratio = 0

        for sub_sentence in subtitles_dict[item]['text']:

            character = ''

            for index in script_dict:

                if 'dialogue' in script_dict[index]:

                    dialogue_text = \
                        nltk.sent_tokenize(script_dict[index]['dialogue'])

                    for d_sentence in dialogue_text:

                        ratio = SequenceMatcher(None, sub_sentence,
                                                d_sentence).ratio()

                        if ratio > highest_ratio:

                            highest_ratio = ratio
                            highest_D_match = index

                            if ratio >= 0.7:
                                time = subtitles_dict[item]['time']
                                character = script_dict[index]['character']

            if character != '':
                subtitles_dict[item]['character'] = character

            if time != '':
                print()
                script_dict[highest_D_match]['time'] = time

        average_ratio[0] += highest_ratio
        average_ratio[1] += 1

        progress[0] += 1

        print(f'{progress[0]}/{progress[1]}', file=sys.stderr)

    for item in subtitles_dict:
        subtitles_dict[item]['text'] = ' '.join(subtitles_dict[item]['text'])

    average_ratio = (average_ratio[0] / average_ratio[1]) * 100

    return average_ratio, script_dict, subtitles_dict