def OPENFILE(): global BUFFER global CHUNK_TIMEOUT count = 0 # paketnum try: with sf.SoundFile(args.filename) as f: filesize = os.path.getsize(args.filename) data = f.buffer_read(args.blocksize, dtype='int16') print(' [INFO] -> start Playing') print( ' [OPENFILE] Samplerate: {} Channels: {} Blocksize: {} Filesize: {}' .format(f.samplerate, f.channels, args.blocksize, filesize)) while data: start = time.time() # paket creation needs time data = f.buffer_read(args.blocksize, dtype='int16') # get data paket = rtp.createPacket(1, 77777, 1, data) # build and send paket # write it in ringbuffer # BUFFER.append(paket) index = count % args.buffersize BUFFER[index] = paket count += 1 end = time.time() # end of creation #paket_creation_time = (end-start)+0.00000005 # addtion time-function paket_creation_time = (end - start) # global periodtime CHUNK_TIMEOUT = (float(args.blocksize) / f.samplerate) - paket_creation_time # wait for first paket time.sleep(CHUNK_TIMEOUT - paket_creation_time) #print (' [OPENFILE] Chunklength: {}'.format(CHUNK_TIMEOUT)) except: print(' [OPENFILE] -> [ERROR] ...')
def record_and_decode(): os.remove('demo.wav') try: with sf.SoundFile('demo.wav', mode='x', samplerate=16000, channels=1, subtype='PCM_16') as file: with sd.InputStream(samplerate=16000, channels=1, callback=record_callback): print('#' * 80) print('press Ctrl+C to stop the recording') print('#' * 80) while True: file.write(q.get()) except KeyboardInterrupt: test_wav('demo.wav')
def _setSndFromFile(self, fileName): # load the file if not path.isfile(fileName): msg = "Sound file %s could not be found." % fileName logging.error(msg) raise ValueError(msg) self.fileName = fileName # in case a tone with inf loops had been used before self.loops = self.requestedLoops try: self.sndFile = sndfile.SoundFile(fileName) sndArr = self.sndFile.read() self.sndFile.close() self._setSndFromArray(sndArr) except Exception: msg = "Sound file %s could not be opened using pysoundcard for sound." logging.error(msg % fileName) raise ValueError(msg % fileName)
def __call__(self, flo): with soundfile.SoundFile(flo) as f: samples = f.read() samples *= ((2**16) // 2) samples = samples.astype(np.int16) bio = BytesIO() proc = subprocess.Popen(args=[ 'ffmpeg', '-y', '-loglevel', 'error', '-f', 's16le', '-ac', str(f.channels), '-ar', str(f.samplerate), '-i', '-', '-acodec', 'libmp3lame', '-f', 'mp3', '-' ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate(input=samples.tobytes()) bio.write(stdout) bio.seek(0) return bio
def addFrameWithTransitionAndPause(self, image_file, audio_file, transition_file, pause): media_info = MediaInfo.parse(transition_file) duration_in_ms = media_info.tracks[0].duration audio_file = r"%s" % audio_file f = sf.SoundFile(audio_file) try: audio_clip = AudioSegment.from_wav(audio_file) except: print("error with frame audio transition pause for %s" % audio_file) audio_clip = AudioSegment.silent(duration=pause) duration = (len(f) / f.samplerate) audio_clip_with_pause = audio_clip self.imageframes.append(image_file) self.audiofiles.append(audio_clip_with_pause) self.durations.append(duration + (pause / 1000)) self.transitions.append((transition_file, len(self.imageframes) - 1, (duration_in_ms / 1000) + (pause / 1000)))
def read_audio(filename, scipy_=True, normalization=True): if scipy_: audio, sample_rate = wavread(filename) audio = audio.astype('float32') if audio.ndim >= 2: audio = np.mean(audio, 1) else: with soundfile.SoundFile(filename) as sound_file: audio = sound_file.read(dtype='float32') sample_rate = sound_file.samplerate if audio.ndim >= 2: audio = np.mean(audio, 1) if normalization: audio = normalize(audio, sample_rate) return audio, sample_rate
def extract_feature(file_name, mfcc, chroma, mel): with soundfile.SoundFile(file_name) as sound_file: X = sound_file.read(dtype="float32") sample_rate=sound_file.samplerate if len(X.shape) > 1: X = X.mean(axis=1) if chroma: stft=np.abs(librosa.stft(X)) result=np.array([]) if mfcc: mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) result=np.hstack((result, mfccs)) if chroma: chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) result=np.hstack((result, chroma)) if mel: mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) result=np.hstack((result, mel)) return result
def particion(): for archivo in os.listdir(UPLOAD_FOLDER): name_file=archivo.split('.') print(name_file) if(name_file[1]=='wav'): f=sf.SoundFile(archivo) time = 30000 j=1 with contextlib.closing(wave.open(UPLOAD_FOLDER+archivo,'r')) as f: frames = f.getnframes() rate = f.getframerate() duration = int((frames / float(rate))*1000) for i in range (0,duration,time): newAudio = AudioSegment.from_wav(archivo) newAudio = newAudio[i:i+30000] newAudio.export(name_file[0]+'_'+str(j)+".---wav", format="wav") j=j+1 #return redirect('/velocidad') return '''
def analyzeAudio(): with open('audio.wav', 'wb') as infile: infile.write(request.data) tempoJson = getTempo('audio.wav') src = sr.AudioFile('audio.wav') f = sf.SoundFile('audio.wav') print('seconds = {}'.format(len(f) / f.samplerate)) print('tempo = ', tempoJson['auftakt_result']['overall_tempo']) rap.make_music(round(tempoJson['auftakt_result']['overall_tempo'])) with src as source: audio = r.record(source) print(type(audio)) try: text = r.recognize_google(audio) print("lyrics:{} ".format(text)) except: print("didnt recognize") return ('This is the analyze')
def select_speakers(dataset_path, max_speakers=100, min_len=20, max_file_len=20, min_file_len=2): """Select speakers of which the audio length longer than the min_len Args: dataset_path (str): wav path including spk subfolders Returns: poss_speakers (dict): {[spk, filelist], ...} to_move (list): spk list > min_len """ possible_map = {} for spk in os.listdir(dataset_path): spk_dur = 0 spk_included = [] for fn in os.listdir(os.path.join(dataset_path, spk)): if fn.endswith(".wav"): full_fn = os.path.join(dataset_path, spk, fn) wav = sf.SoundFile(full_fn) dur = len(wav)/wav.samplerate if dur < min_file_len or dur > max_file_len: print(f"Ignore {full_fn}") continue else: spk_dur += dur spk_included.append(full_fn) possible_map[spk] = {"dur": spk_dur, "wav_path_included": spk_included} poss_speakers = [[k, v["wav_path_included"], v["dur"]] for k, v in possible_map.items() if v["dur"]/float(60) >= min_len] poss_speakers = sorted(poss_speakers, key = lambda x: x[2])[::-1] to_move = [s[0] for s in poss_speakers[:max_speakers]] print('selected speakers', len(to_move)) poss_speakers = {s[0]: s[1] for s in poss_speakers} return poss_speakers, to_move
def play_wav(wav_file, buffersize=20, blocksize=1024, socketio=None): q = queue.Queue(maxsize=buffersize) event = threading.Event() def callback(outdata, frames, time, status): assert frames == blocksize if status.output_underflow: print('Output underflow: increase blocksize?', file=sys.stderr) raise sd.CallbackAbort assert not status try: data = q.get_nowait() except queue.Empty: print('Buffer is empty: increase buffersize?', file=sys.stderr) raise sd.CallbackAbort if len(data) < len(outdata): outdata[:len(data)] = data outdata[len(data):] = b'\x00' * (len(outdata) - len(data)) raise sd.CallbackStop else: outdata[:] = data with sf.SoundFile(wav_file) as f: for _ in range(buffersize): data = f.buffer_read(blocksize, dtype='float32') if not data: break q.put_nowait(data) # Pre-fill queue stream = sd.RawOutputStream(samplerate=f.samplerate, blocksize=blocksize, channels=f.channels, dtype='float32', callback=callback, finished_callback=event.set) with stream: timeout = blocksize * buffersize / f.samplerate while data: data = f.buffer_read(blocksize, dtype='float32') q.put(data, timeout=timeout) event.wait() # Wait until playback is finished return stream
def sampleNote(self, channel, note, velocity, fileprefix='', folder=None, subfolder=None): if not folder is None: if not os.path.exists(folder): os.mkdir(folder) if not os.path.exists('%s\\%s'%(folder, subfolder)): os.mkdir('%s\\%s'%(folder, subfolder)) file = '%s\\%s\\%s_%s_%s_%s.wav'%(folder, subfolder, fileprefix, channel, note, velocity) else: file = '%s_%s_%s_%s.wav'%(fileprefix, channel, note, velocity) if os.path.exists(file): os.remove(file) self.q = queue.Queue() self.record = True self.slagTime = 35 * 3 self.thresholdStart = False self.startRecord = False self.timeStart = time.time() def callback(indata, frames, timeCFFI, status): peak=np.average(np.abs(indata))*32*64 if self.thresholdStart: if peak>=.1: self.startRecord=True if self.startRecord: self.q.put(indata.copy()) else: self.q.put(indata.copy()) bars="#"*int(128*64*peak/2**16) timeDiff = round((time.time() - self.timeStart) * 1000) print(' Preset %s - Channel %s - Note %s - Velocity %s - Device %s - Input Channels %s - Sample Rate %s - Bit Depth %s - Sample Time %sms - Used Frames %04d - Peak %05d %s'%(fileprefix, channel+1, note, velocity, sd.default.device[0], self.inputChannels, self.samplerate, self.subtype, timeDiff, frames, peak, bars)) if peak<.1: self.slagTime -= 1 if self.slagTime == 0: self.record = False with sf.SoundFile(file, mode='x', samplerate=sd.default.samplerate, channels=self.channels_in, subtype=self.subtype) as f: with sd.InputStream(callback=callback): self.md.sendNote(channel, note, velocity) while self.record: f.write(self.q.get())
def load_audio(self, filename): #called whenever a potential audio file is set as self.filename - via drag& drop or open_audio if filename: if filename != self.filename: #ask the user if it should really be opened, if another file was already open if self.filename: qm = QtWidgets.QMessageBox ret = qm.question(self,'', "Do you really want to load "+os.path.basename(filename)+"? You will lose unsaved work on "+os.path.basename(self.filename)+"!", qm.Yes | qm.No) if ret == qm.No: return # is the (dropped) file an audio file, ie. can it be read by pysoundfile? try: soundob = sf.SoundFile(filename) self.filename = filename except: print(filename+" could not be read, is it a valid audio file?") return #Cleanup of old data self.parent.canvas.init_fft_storages() self.delete_traces(not_only_selected=True) self.resampling_widget.refill(soundob.channels) #finally - proceed with spectrum stuff elsewhere self.parent.setWindowTitle('pyrespeeder '+os.path.basename(self.filename)) self.parent.canvas.set_file_or_fft_settings((filename,), fft_size = self.display_widget.fft_size, fft_overlap = self.display_widget.fft_overlap) # also force a cmap update here self.display_widget.update_cmap() #read any saved traces or regressions data = resampling.read_trace(self.filename) for offset, times, freqs in data: TraceLine(self.parent.canvas, times, freqs, offset=offset) self.parent.canvas.master_speed.update() data = resampling.read_regs(self.filename) for t0, t1, amplitude, omega, phase, offset in data: RegLine(self.parent.canvas, t0, t1, amplitude, omega, phase, offset) self.parent.canvas.master_reg_speed.update()
def __init__(self, infile): """ Initialize Sound object. Parameters ---------- infile : str Path of the sound file. Raises ------ ValueError If sound file can't be found. Returns ------- Sound object. """ if os.path.isfile(infile): myfile = sf.SoundFile(infile) self._file_duration_sample = myfile.seek(0, sf.SEEK_END) self._file_sampling_frequency = myfile.samplerate self._file_duration_sec = self._file_duration_sample / \ self._file_sampling_frequency self._channels = myfile.channels self._channel_selected = [] self._file_dir = os.path.dirname(infile) self._file_name = os.path.basename(os.path.splitext(infile)[0]) self._file_extension = os.path.splitext(infile)[1] self._filter_applied = False self._waveform = [] self._waveform_start_sample = [] self._waveform_stop_sample = [] self._waveform_duration_sample = 0 self._waveform_duration_sec = 0 self._waveform_sampling_frequency = self._file_sampling_frequency self.detrended = [] myfile.close() else: raise ValueError("The sound file can't be found. Please verify" + ' sound file name and path')
def gen_json(): with open("vocab.json", encoding="utf8") as v_file: v = v_file.readline() vocab = json.loads(v) partitions = ["test", "dev", "train"] main_path = "./data_thchs30/" jsons = [] for part in partitions: path = os.path.abspath(os.path.join(main_path, part)) os.chdir(path) files = glob.glob(os.path.abspath("*.wav")) js = [] for f in files: d = {} suf = f.split("\\")[-1] audio_path = os.path.join( os.path.join(os.path.abspath("../../rnn/data"), part), suf) audio = sf.SoundFile(audio_path) duration = int(10**6 * len(audio) / audio.samplerate) trn = os.path.abspath(f + ".trn") label = [] with open(trn, encoding="utf8") as file: label_path = file.readline()[0:-1] abs_path = os.path.abspath(label_path) with open(abs_path, encoding="utf8") as label_file: _ = label_file.readline() label = label_file.readline()[0:-1].split() d["audio"] = audio_path d["text"] = label d["duration"] = duration js.append(d) if part != "train": jsons.append(js) else: js += jsons[0] js += jsons[1] os.chdir(os.path.abspath("../../rnn/data/partitions")) with open(part + ".json", "w") as outfile: for j in js: outfile.write(json.dumps(j) + '\n') os.chdir(os.path.abspath("../../../"))
def cqt_from_file(filename, step=10, max_freq=None, trim=False): sample_rate = 16000 with soundfile.SoundFile(filename) as sound_file: n_cqt = 13 y, sr = librosa.load(filename, sr=sample_rate) if trim: audio = trim_silence(y) else: audio = y if audio.size == 0: audio = y sample_rate = sound_file.samplerate if audio.ndim >= 2: audio = np.mean(audio, 1) if max_freq is None: max_freq = sample_rate / 2 if max_freq > sample_rate / 2: raise ValueError("max_freq must not be greater than half of " " sample rate") hop_length = int(0.001 * step * sample_rate) f_min = max_freq / 2**9 cqt = librosa.feature.chroma_cqt(audio, sample_rate, hop_length=hop_length, fmin=f_min, n_chroma=n_cqt, n_octaves=5) # Unable to vstack cqt ValueError: all the input array dimensions except for the concatenation axis must match exactly return cqt
def record_audio(dir_name="", rec_itter=0): filename = WAVE_OUTPUT_FILE_HEADER + str(rec_itter) + ".wav" filename = os.path.join(dir_name, filename) stop_filename = os.path.join(dir_name, STOP_SIGNAL) device_info = sd.query_devices(DEVICE, 'input') # print(device_info) rate = int(device_info['default_samplerate']) # print(rate) q = queue.Queue() def callback(indata, frames, time, status): del frames, time """This is called (from a separate thread) for each audio block.""" if status: print(status, file=sys.stderr) q.put(indata.copy()) with sf.SoundFile(filename, mode='x', samplerate=rate, channels=CHANNELS) as file: # signal.signal(signal.SIGINT, lambda n, f: (file.close(), # print("* done recording"), # exit(0))) # signal.signal(signal.SIGTERM, lambda n, f: (file.close(), # print("* done recording"), # exit(0))) with sd.InputStream(samplerate=rate, device=DEVICE, channels=CHANNELS, callback=callback): print("* recording") while True: file.write(q.get()) if os.path.isfile(stop_filename): file.close() print("* done recording") exit(0)
def play_audio(data, fs, volumn=1, speed=1): ''' Play audio from numpy array. Parameters ---------- data : numpy.ndarray signal data fs : int sample rate volumn: float between 0 and 1 speed: float > 1 mean faster, < 1 mean slower Note ---- Only support play audio on MacOS ''' import soundfile as sf import os data = numpy.asarray(data, dtype=numpy.int16) if data.ndim == 1: channels = 1 else: channels = data.shape[1] with TemporaryDirectory() as temppath: path = os.path.join(temppath, 'tmp_play.wav') with sf.SoundFile(path, 'w', fs, channels, subtype=None, endian=None, format=None, closefd=None) as f: f.write(data) os.system('afplay -v %f -q 1 -r %f %s &' % (volumn, speed, path)) raw_input('<enter> to stop audio.') os.system( "kill -9 `ps aux | grep -v 'grep' | grep afplay | awk '{print $2}'`" )
def extract_feature(file_name, **kwargs): """ Extract feature from audio file `file_name` Features supported: - MFCC (mfcc) - Chroma (chroma) - MEL Spectrogram Frequency (mel) - Contrast (contrast) - Tonnetz (tonnetz) e.g: `features = extract_feature(path, mel=True, mfcc=True)` """ mfcc = kwargs.get("mfcc") chroma = kwargs.get("chroma") mel = kwargs.get("mel") contrast = kwargs.get("contrast") tonnetz = kwargs.get("tonnetz") with soundfile.SoundFile(file_name) as sound_file: X = sound_file.read(dtype="float32") sample_rate = sound_file.samplerate if chroma or contrast: stft = np.abs(librosa.stft(X)) result = np.array([]) if mfcc: mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) result = np.hstack((result, mfccs)) if chroma: chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, chroma)) if mel: mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) result = np.hstack((result, mel)) if contrast: contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, contrast)) if tonnetz: tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0) result = np.hstack((result, tonnetz)) return result
def __init__(self, filename, target_sr=None, int_values=False, offset=0, duration=0, trim=False, trim_db=60): """Create audio segment from samples. Samples are converted to float32 internally, with int scaled to [-1, 1]. Load a file supported by librosa and return as an AudioSegment. :param filename: path of file to load :param target_sr: the desired sample rate :param int_values: if true, load samples as 32-bit integers :param offset: offset in seconds when loading audio :param duration: duration in seconds when loading audio :return: numpy array of samples """ with sf.SoundFile(filename, 'r') as f: dtype = 'int32' if int_values else 'float32' sample_rate = f.samplerate if offset > 0: f.seek(int(offset * sample_rate)) if duration > 0: samples = f.read(int(duration * sample_rate), dtype=dtype) else: samples = f.read(dtype=dtype) samples = samples.transpose() samples = self._convert_samples_to_float32(samples) if target_sr is not None and target_sr != sample_rate: samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=target_sr) sample_rate = target_sr if trim: samples, _ = librosa.effects.trim(samples, top_db=trim_db) self._samples = samples self._sample_rate = sample_rate if self._samples.ndim >= 2: self._samples = np.mean(self._samples, 1)
def audio_generator_from_file(input_filename, target_sr, int_values, chunk_duration): sf = soundfile.SoundFile(input_filename, 'rb') chunk_size = int(chunk_duration * sf.samplerate) start = True end = False while not end: audio_signal, end = get_audio_chunk_from_soundfile( sf, chunk_size, int_values) audio_segment = AudioSegment(audio_signal, sf.samplerate, target_sr) yield audio_segment.samples, target_sr, start, end start = False sf.close()
def change_audio_rate(annotation_path): for annotation_text in os.listdir(annotation_path): print('正在将%s音频的采样率改为16000Hz,将消耗大量的时间,请等待 ...' % annotation_text) annotation_text = os.path.join(annotation_path, annotation_text) with open(annotation_text, 'r', encoding='utf-8') as f: lines = f.readlines() for line in tqdm(lines): audio_path = line.split('\t')[0] sndfile = soundfile.SoundFile(audio_path) samplerate = sndfile.samplerate if samplerate != 16000: f = wave.open(audio_path, "rb") str_data = f.readframes(f.getnframes()) f.close() file = wave.open(audio_path, 'wb') file.setnchannels(1) file.setsampwidth(4) file.setframerate(16000) file.writeframes(str_data) file.close()
def wav_to_chroma(path_to_wav_file, offset=0.0, duration=None, dtype=np.float32): print("using soundfile") with sf.SoundFile(path_to_wav_file) as sf_desc: sr_native = sf_desc.samplerate if offset: # Seek to the start of the target read sf_desc.seek(int(offset * sr_native)) if duration is not None: frame_duration = int(duration * sr_native) else: frame_duration = -1 # Load the target number of frames, and transpose to match librosa form wav = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T # create chroma (STFT --> spectrogram --> chromagram) stft = create_stft(wav) chroma = create_chroma(stft) return chroma
def get_chunks(self, labels): chunks = {} chunk_size = self.configs["data"]["segment"] frame_shift = self.configs["data"]["segment"] for l in labels: sess = Path(l).stem.split("-")[-1] chunks[sess] = [] # generate chunks for this file c_length = len(sf.SoundFile( l)) # get the length of the session files in samples for st, ed in _gen_frame_indices(c_length, chunk_size, frame_shift, use_last_samples=False): if st < 7000: # exclude first minute which contains enrollement for each speaker continue chunks[sess].append([st, ed]) return chunks
def read_audio( path_or_fd: Union[Pathlike, FileObject], offset: Seconds = 0.0, duration: Optional[Seconds] = None ) -> Tuple[np.ndarray, int]: try: import soundfile as sf with sf.SoundFile(path_or_fd) as sf_desc: sampling_rate = sf_desc.samplerate if offset > 0: # Seek to the start of the target read sf_desc.seek(compute_num_samples(offset, sampling_rate)) if duration is not None: frame_duration = compute_num_samples(duration, sampling_rate) else: frame_duration = -1 # Load the target number of frames, and transpose to match librosa form return sf_desc.read(frames=frame_duration, dtype=np.float32, always_2d=False).T, sampling_rate except: return _audioread_load(path_or_fd, offset=offset, duration=duration)
def segment_from_file(cls, filename, target_sr=None, n_segments=0, trim=False): """Grabs n_segments number of samples from filename randomly from the file as opposed to at a specified offset. """ with sf.SoundFile(filename, 'r') as f: sample_rate = f.samplerate if n_segments > 0 and len(f) > n_segments: max_audio_start = len(f) - n_segments audio_start = random.randint(0, max_audio_start) f.seek(audio_start) samples = f.read(n_segments, dtype='float32') else: samples = f.read(dtype='float32') samples = samples.transpose() return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
def recorder( file_name): if args.samplerate is None: device_info = sd.query_devices(args.device, 'input') # soundfile expects an int, sounddevice provides a float: args.samplerate = int(device_info['default_samplerate']) #open(file_name, 'w') # Make sure the file is opened before recording anything: try: os.remove(file_name) except: pass with sf.SoundFile(file_name, mode='x', samplerate=args.samplerate, channels=args.channels, subtype=args.subtype) as file: with sd.InputStream(samplerate=args.samplerate, device=args.device, channels=args.channels, callback=callback): while True: file.write(q.get()) if( keyboard.is_pressed("left")): break
def sound_buffer_save(): global buffer_idx global data_idx global buffer a = np.array([]) idx = (buffer_idx + 1) % BUFFER_LENGTH for i in range(BUFFER_LENGTH - 1): a = np.append(a, buffer[idx]) idx = (idx + 1) % BUFFER_LENGTH #a = np.reshape(a, (int(a.shape[0]/cp["audio_channels"]), cp["audio_channels"])) filename = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") + ".wav" sfile = sf.SoundFile(filename, mode='x', samplerate=8000, channels=1, subtype="PCM_24") sfile.write(a) time.sleep(1) sfile.close()
def trans(path,sound): os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/Users/hbk/data/speech_key.json' client = speech.SpeechClient() file_name = path+sound with io.open(file_name, 'rb') as audio_file: content = audio_file.read() audio = types.RecognitionAudio(content=content) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=sf.SoundFile(file_name).samplerate, language_code='ko-KR') response = client.recognize(config, audio) for result in response.results: #print('Transcript: {}'.format(result.alternatives[0].transcript)) res = result.alternatives[0].transcript return res
def divisor_audio(file, paths): for file in os.listdir(UPLOAD_FOLDER): name_file=file.split('.') if(name_file[1]=='wav'): #Buscamos dentro de la carpeta donde está el archivo python, un archivo con extension ".wav" f=sf.SoundFile(file) time = 30000 j=1 with contextlib.closing(wave.open(UPLOAD_FOLDER+ "/"+ file,'r')) as f: frames = f.getnframes() rate = f.getframerate() duration = int((frames / float(rate))*1000) #Duracion total del audio for i in range (0,duration,time): #rango de duracion del audio, tiempo que deseamos partir el audio (30seg) newAudio = AudioSegment.from_wav(file) newAudio = newAudio[i:i+30000] #partimos el audio cada 30 segundos destino = paths+"/"+name_file[0]+'_'+str(j) newAudio.export(destino, format="wav") j=j+1 #repetimos la accion hasta terminar la duracion