Ejemplo n.º 1
0
def main():
    transcript_directory = get_transcript_directory()
    all_files = os.listdir(transcript_directory)

    non_duplicated_files = [f for f in all_files if "(1)" not in f]

    commands = []

    for file_name in non_duplicated_files:
        duplicated_file_name = get_duplicated_file_name(file_name)

        original_file_path = os.path.join(transcript_directory, file_name)
        duplicated_file_path = os.path.join(transcript_directory, duplicated_file_name)

        if os.path.exists(original_file_path) and os.path.exists(duplicated_file_path):

            t0: Transcript = get_transcript_by_filename(file_name)
            t1: Transcript = get_transcript_by_filename(duplicated_file_name)

            speaker0 = next(t0.paragraphs()).speaker
            speaker1 = next(t1.paragraphs()).speaker

            if speaker0 == "Unknown Speaker":
                print(
                    f"Duplicate found: {file_name}->{speaker0}, {duplicated_file_name}->{speaker1}"
                )
                commands.append(f'cp "{duplicated_file_name}" {file_name}')

            commands.append(f'git rm "{duplicated_file_name}"')

    print("\n".join(commands))
Ejemplo n.º 2
0
def test_get_last_paragraph_for_transcript():
    t: Transcript = get_transcript_by_filename(
        filename="hooting_yard_2016-06-16.txt")
    last_paragraph = list(t.paragraphs())[-1]
    assert last_paragraph.text.startswith(
        "Finally this week"), f"Wrong text: {last_paragraph.text}"
    assert last_paragraph.time_code == datetime.timedelta(seconds=27 * 60 + 9)
Ejemplo n.º 3
0
def test_second_paragraph():
    t: Transcript = get_transcript_by_filename(
        filename="hooting_yard_2016-06-16.txt")
    paragraphs = t.paragraphs()
    _ = next(paragraphs)
    second_paragraph = next(paragraphs)
    assert second_paragraph.time_code == datetime.timedelta(seconds=43)
Ejemplo n.º 4
0
def test_get_id_for_transcript():
    t: Transcript = get_transcript_by_filename(
        filename="hooting_yard_2016-06-16.txt")
    assert t.get_id() == "hooting_yard_2016-06-16"
Ejemplo n.º 5
0
def test_get_words_for_paragraphs_for_transcript():
    t: Transcript = get_transcript_by_filename(
        filename="hooting_yard_2016-06-16.txt")
    first_paragraph = next(t.paragraphs())
    assert first_paragraph.time_code == datetime.timedelta(seconds=13)
Ejemplo n.º 6
0
def test_get_transcript_by_filename():
    t: Transcript = get_transcript_by_filename(
        filename="hooting_yard_2016-06-16.txt")
    assert isinstance(t, Transcript), f"Expected a Transcript, got {t}"
Ejemplo n.º 7
0
def test_matching():
    t = get_transcript_by_filename("hooting_yard_2008-04-24.txt")
    ngram_lookup_function = ngram_to_script_index()
    match_result = match_single_transcript(ngram_lookup_function, t)