Exemple #1
0
def test_filter_files_orphan_ref():
    from utilities.utilities import Utilities
    u = Utilities()
    data = ['blah.txt', 'blah.ogg', 'orphan.txt']
    expected = ['blah.txt', 'blah.ogg']
    result = u.filter_files(data)
    assert result == expected
Exemple #2
0
def test_filter_files_doc():
    from utilities.utilities import Utilities
    u = Utilities()
    data = ['file.txt', 'hello.doc', 'file.wav']
    expected = ['file.txt', 'file.wav']
    result = u.filter_files(data)
    assert result == expected
    raw_file_list = list()
    # Get either local files or cloud storage


    if not local_files_path:
        # Get list of all files in google cloud storage (gcs) bucket
        gcs = GCS()
        raw_file_list = gcs.get_file_list(cloud_store_uri)
    else:
        raw_file_list = utilities.local_files(local_files_path)

    logger.debug(f'RAW FILE LIST: {raw_file_list}')

    # Filter file list
    filtered_file_list = utilities.filter_files(raw_file_list, only_transcribe)

    if not local_files_path:
        final_file_list = [utilities.append_uri(cloud_store_uri, file) for file in filtered_file_list]
    else:
        final_file_list = filtered_file_list

    logger.debug(f'FINAL FILE LIST: {final_file_list}')

    # if only doing transcriptions, add diarization and punctuation?
    dia = False
    punct = False
    c = None
    if only_transcribe:
        dia = input('Add Diarization Y/N ')
        if dia.lower() == 'y':