Esempio n. 1
0
def combine_wavs_batch(audio_paths, method, **kargv):
    audio_paths.sort()
    method = method.lower()

    if method == "librosa":
        fn = partial(split_on_silence_with_librosa, **kargv)
    elif method == "pydub":
        fn = partial(split_on_silence_with_pydub, **kargv)

    parallel_run(fn, audio_paths,
            desc="Split on silence", parallel=False)

    audio_path = audio_paths[0]
    spl = os.path.basename(audio_path).split('.', 1)
    prefix = os.path.dirname(audio_path)+"/"+spl[0]+"."
    in_ext = audio_path.rsplit(".")[1]

    data = load_json(config.alignment_path, encoding="utf8")

    #print(data)

    for i in range(len(wavs)-1):
        if len(wavs[i]) > 15000:
             continue
        if not paths[i] in data:
             continue

        sum = len(wavs[i])
        filename = prefix + str(i).zfill(4)+"."
        asr = data[paths[i]]+" "
        concated = wavs[i]
        for j in range(i+1, len(wavs)):
             sum += len(wavs[j])
             sum += 400
             if sum > 15000:
                break
             if not paths[j] in data:
                break
             filename = filename + str(j).zfill(4) + "."
             asr = asr + data[paths[j]] + " "
             concated = concated + silence + wavs[j]
             final_fn = filename+"wav"
             data[final_fn] = asr
             concated.export(final_fn, format="wav")
             print(filename+"wav | "+str(len(concated)))

    if os.path.exists(config.alignment_path):
        backup_file(config.alignment_path)

    write_json(config.alignment_path, data)
    get_durations(data.keys(), print_detail=False)
    return 0
Esempio n. 2
0
        results.update(item)

    found_count = sum([type(value) == str for value in results.values()])
    print(" [*] # found: {:.5f}% ({}/{})".format(
        len(results) / len(data), len(results), len(data)))
    print(" [*] # exact match: {:.5f}% ({}/{})".format(
        found_count / len(items), found_count, len(items)))

    return results


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--recognition_path', required=True)
    parser.add_argument('--alignment_filename', default="alignment.json")
    parser.add_argument('--score_threshold', default=0.4, type=float)
    parser.add_argument('--recognition_encoding', default='utf-8')
    config, unparsed = parser.parse_known_args()

    results = align_text_batch(config)

    base_dir = os.path.dirname(config.recognition_path)
    alignment_path = \
            os.path.join(base_dir, config.alignment_filename)

    if os.path.exists(alignment_path):
        backup_file(alignment_path)

    write_json(alignment_path, results)
    duration = get_durations(results.keys(), print_detail=False)
Esempio n. 3
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--audio_pattern', required=True)
    parser.add_argument('--alignment_path', required=True)
    parser.add_argument('--out_ext', default='wav')
    parser.add_argument('--method',
                        choices=['librosa', 'pydub'],
                        required=True)
    config = parser.parse_args()

    data = load_json(config.alignment_path, encoding="utf8")

    audio_paths = glob(config.audio_pattern)

    for path in audio_paths:
        single_path = path.replace('.wav', '_s??.wav')
        single_paths = glob(single_path)
        combine_wavs_batch(
            single_paths,
            config.method,
            out_ext=config.out_ext,
        )
        wavs = []
        paths = []

    if os.path.exists(config.alignment_path):
        backup_file(config.alignment_path)

    write_json(config.alignment_path, data)
    get_durations(data.keys(), print_detail=False)