def decode_files(client, audio_paths: List[str], model: str, language_code: str, sample_rate=8000, max_alternatives=10, raw: bool = False, pcm: bool = False, word_level: bool = False): """ Decode files using threaded requests """ chunked_audios = [ chunks_from_file(x, sample_rate=sample_rate, chunk_size=1, raw=raw, pcm=pcm) for x in audio_paths ] threads = [ threading.Thread(target=transcribe_chunks_streaming, args=(client, chunks, model, language_code, sample_rate, max_alternatives, raw, word_level)) for chunks in chunked_audios ] for thread in threads: thread.start() for thread in threads: thread.join()
def decode_files(client, audio_paths: List[str], model: str, language_code: str, raw: bool = False): """ Decode files using threaded requests """ chunked_audios = [ chunks_from_file(x, chunk_size=random.randint(1, 3), raw=raw) for x in audio_paths ] threads = [ threading.Thread(target=transcribe_chunks, args=(client, chunks, model, language_code, raw)) for chunks in chunked_audios ] for thread in threads: thread.start() for thread in threads: thread.join()
def decode_audios(audio_paths: List[str], model: str, language_code: str, num_proc: int = 8, segment_long_utt: bool = False, raw: bool = False): """ Decode files using threaded requests """ if segment_long_utt: segmented_audios = [ non_silent_segments_from_file(audio_file, segment_length=10) for audio_file in audio_paths ] chunked_segmented_audios = [[ chunks_from_audio_segment(seg, chunk_size=1, raw=raw) for seg in segments ] for segments in segmented_audios] args = [[(segment_chunks, model, language_code, raw) for segment_chunks in segmented_audio] for segmented_audio in chunked_segmented_audios] results = [] for audio_segs in args: with Pool(num_proc) as pool: results.append(pool.starmap(transcribe_chunks, audio_segs)) else: chunked_audios = [ chunks_from_file(x, chunk_size=1, raw=raw) for x in audio_paths ] args = [(chunks, model, language_code, raw) for chunks in chunked_audios] with Pool(num_proc) as pool: results = pool.starmap(transcribe_chunks, args) results_dict = { path: response for path, response in list(zip(audio_paths, results)) } return results_dict
def __init__(self, name, parent, item, client): super().__init__(name, parent) self.audios = [(chunks_from_file(audio_spec["file"]), audio_spec["transcription"]) for audio_spec in item] self.client = client self.results = [None for _ in item]