def test_write_to_sqlite(input_file):
    """
    Test production of sqlite output
    
    GIVEN an input file
    WHEN writing to sqlite
    THEN check output exists and contains content
    """

    # GIVEN an input file
    # WHEN writing to sqlite
    output_filename = Path(f"{uuid4().hex}.db")
    tscribe.write(input_file, save_as=output_filename, format="sqlite")

    # THEN check output exists and contains content
    assert output_filename.is_file(), "Output file should exist"

    conn = sqlite3.connect(str(output_filename))
    c = conn.cursor()
    c.execute("SELECT * FROM transcript")
    query = c.fetchall()

    data = tscribe.load_json(input_file)
    df = tscribe.decode_transcript(data)

    assert len(query) == len(df), "Database table should be length of dataframe"

    # Teardown
    os.remove(output_filename)
def test_write_to_csv(input_file):
    """
    Test production of csv output
    
    GIVEN an input file
    WHEN writing to csv
    THEN check output exists and contains content
    """

    # GIVEN an input file
    # WHEN writing to csv
    output_filename = Path(f"{uuid4().hex}.csv")
    tscribe.write(input_file, save_as=output_filename, format="csv")

    # THEN check output exists and contains content
    assert output_filename.is_file(), "Output file should exist"

    with open(output_filename, "r") as file:
        lines = file.readlines()

    data = tscribe.load_json(input_file)
    df = tscribe.decode_transcript(data)

    assert len(lines) == len(df) + 1, "CSV should be length of dataframe + headers"

    # Teardown
    os.remove(output_filename)
def test_decode_transcript(input_file):
    """
    Test transcript decoding function

    GIVEN a data dict
    WHEN calling decode_transcript(...)
    THEN 
    """

    # GIVEN a data dict
    data = tscribe.load_json(input_file)

    # WHEN calling decode_transcript(...)
    df = tscribe.decode_transcript(data)

    # THEN
    assert isinstance(
        df, pandas.DataFrame
    ), "decode_transcript should return a Pandas Data Frame"

    rows, cols = df.shape

    assert cols == 4, "Dataframe should have four columns"

    if input_file == "sample_single.json":
        # TODO
        pass

    if input_file == "sample_multiple.json":
        assert rows == len(
            data["results"]["speaker_labels"]["segments"]
        ), "Rows should match number of segments"
def test_write_to_docx(input_file):
    """
    Test production of docx output
    
    GIVEN an input file
    WHEN writing to docx
    THEN check output exists and contains content
    """

    logging.info("test_write_to_docx")

    # GIVEN an input file
    # WHEN writing to docx
    output_filename = Path(f"{uuid4().hex}.docx")
    tscribe.write(input_file, save_as=output_filename, format="docx")

    # THEN check output exists and contains content
    assert output_filename.is_file(), "Output file should exist"

    document = Document(output_filename)

    assert (len(document.tables) == 2
            ), "Document should contain two tables, stats and transcript"

    t_conf = document.tables[0].cell(0, 0).text
    t_count = document.tables[0].cell(0, 1).text
    t_perc = document.tables[0].cell(0, 2).text
    assert (t_conf, t_count, t_perc) == (
        "Confidence",
        "Count",
        "Percentage",
    ), "First table should be stats headers"
    assert len(
        document.tables[0].rows) == 12, "Stats table should hold 12 rows"

    t_time = document.tables[1].cell(0, 0).text
    t_speaker = document.tables[1].cell(0, 1).text
    t_content = document.tables[1].cell(0, 2).text
    assert (t_time, t_speaker, t_content) == (
        "Time",
        "Speaker",
        "Content",
    ), "Second table should be transcript headers"
    data = tscribe.load_json(input_file)
    df = tscribe.decode_transcript(data)
    assert (len(document.tables[1].rows) == len(df) +
            1), "Second table should be length of dataframe + headers"

    assert (
        "chart.png"
        in document.paragraphs[6]._p.xml), "Chart should be in paragraph six"

    # Teardown
    os.remove(output_filename)
def test_write_to_vtt(input_file):
    """
    Test production of vtt format

    GIVEN an input file
    WHEN writing to vtt
    THEN check output exists and contains content
    """

    logging.info("test_write_to_vtt")

    # GIVEN an input file
    # WHEN writing to vtt
    output_filename = Path(f"{uuid4().hex}.vtt")
    tscribe.write(input_file, save_as=output_filename, format="vtt")

    # THEN check output exists and contains content
    vtt = webvtt.read(output_filename)

    data = tscribe.load_json(input_file)
    df = tscribe.decode_transcript(data)
    assert len(vtt.captions) == len(
        df), "vtt file should have equal captions to df rows"

    for caption in vtt.captions:

        assert hasattr(caption,
                       "start"), "each caption should have a start_time"
        assert hasattr(caption, "end"), "each caption should have a end_time"
        assert hasattr(caption, "text"), "each caption should have text"
        assert len(caption.lines) >= len(
            caption.text) / 80, "text should be split into max 80 long lines"
        if input_file != "sample_single.json":
            assert hasattr(
                caption,
                "identifier"), "each caption should have an identifier"

    # Teardown
    os.remove(output_filename)