def preprocess_audio(self, x, audio_fps): x = ap.to_mono(x.numpy()) if audio_fps != self.audio_fps: x = ap.resample(x, audio_fps, self.audio_fps) if x.shape[0] < self.audio_fps: x = np.pad(x, (0, self.audio_fps - x.shape[0])) return x.reshape((1, -1))
def generate_cqt(file_path, st_status): st_status.text('Opening {}'.format(file_path)) data, sample_rate = auto_load(file_path, sr=None) print('Sample Rate:', sample_rate, 'shape:', data.shape) if len(data.shape) == 2: print('Converting to mono channel...') data = to_mono(data) st_status.text('Resampling to {} Hz...'.format(TARGET_SAMPLE_RATE)) downsampled_data = resample(data, orig_sr=sample_rate, target_sr=TARGET_SAMPLE_RATE) # downsampled_data = data st_status.text('Downsampled to {} Hz, shape is now {}'.format( TARGET_SAMPLE_RATE, downsampled_data.shape)) st_status.text('Generating CQT...') cqt_result = np.abs( cqt(downsampled_data, sr=TARGET_SAMPLE_RATE, hop_length=HOP_LENGTH, n_bins=TOTAL_BINS, bins_per_octave=BINS_PER_OCTAVE)) return cqt_result
def generate_cqt(i, file_path, offset=0, duration=None): print('[{}] Opening'.format(i), file_path) data, sample_rate = load(file_path, sr=None, offset=offset, duration=duration) print('[{}] Sample Rate:'.format(i), sample_rate, 'shape:', data.shape) if len(data.shape) == 2: with Timer('[{}] Converted to mono'.format(i)): print('[{}] Converting to mono channel...'.format(i)) data = to_mono(data) with Timer('[{}] Resampling'.format(i)): print('[{}] Resampling to'.format(i), TARGET_SAMPLE_RATE, 'Hz...') downsampled_data = resample(data, orig_sr=sample_rate, target_sr=TARGET_SAMPLE_RATE) # downsampled_data = data print('[{}] Downsampled to'.format(i), TARGET_SAMPLE_RATE, 'Hz shape is now', downsampled_data.shape) with Timer('[{}] CQT'.format(i)): print('[{}] Generating CQT...'.format(i)) cqt_result = np.abs( cqt(downsampled_data, sr=TARGET_SAMPLE_RATE, hop_length=HOP_LENGTH, n_bins=TOTAL_BINS, bins_per_octave=BINS_PER_OCTAVE)) return cqt_result
def to_mono(self, x): """ make sure we deal with a 1D array """ if len(x.shape) == 2: return lb.to_mono(numpy.transpose(x)) else: return x
def downsample_mono(path, sr): obj = wavio.read(path) wav = obj.data.astype(np.float32, order="F") rate = obj.rate try: channel = wav.shape[1] if channel == 2: wav = to_mono(wav.T) elif channel == 1: wav = to_mono(wav.reshape(-1)) except IndexError: wav = to_mono(wav.reshape(-1)) pass except Exception as exc: raise exc wav = resample(wav, rate, sr) wav = wav.astype(np.int16) return sr, wav
def downsample_mono(path, sr): rate, wav = wavfile.read(path) wav = wav.astype(np.float32, order='F') try: tmp = wav.shape[1] wav = to_mono(wav.T) except: pass wav = resample(wav, rate, sr) wav = wav.astype(np.int16) return sr, wav
def downsample_mono(path, sr): obj = wavio.read(path) wav = obj.data.astype(np.float32, order='F') rate = obj.rate try: # tmp = wav.shape[1] wav = to_mono(wav.T) except: pass wav = resample(wav, rate, sr) wav = wav.astype(np.int16) return sr, wav
def downsample(path, down_sample): sample_rate, wave = wavfile.read(path) wave = wave.astype(np.float32, order='F') ## wave, sample_rate = librosa.load(path, sr = args.down_sample, mono=True) try: tmp = wave.shape[1] wave = to_mono(wave.T) except: pass wave = resample(wave, sample_rate, down_sample) wave = wave.astype(np.int16) return wave, down_sample
def load_audio_file(file_path): data_l = os.listdir(file_path) input_length = 16000 x = 1 for i in data_l: rate, data_in = wavfile.read(file_path + i) data_in = data_in.astype(np.float32, order='F') try: tmp = data_in.shape[1] data_in = to_mono(data_in.T) except: pass data_in = resample(data_in, rate, 16000) data_in = data_in.astype(np.float32) data_ap.append(data_in) x += 1 return data_ap
def stream_to_np(bytes_io, sr=22050, mono=True, offset=0.0, duration=None, dtype=np.float32, res_type='kaiser_best'): """ 重写了librosa.load函数,把文件参数改成bytesIO类型,并把audioread.audio_open替换为自定义的RawAudioStream类, 因为前者需要文件路径作为参数。 :param bytes_io: :param sr: :param mono: :param offset: :param duration: :param dtype: :param res_type: :return: """ y = [] with RawAudioStream(bytes_io) as input_file: sr_native = input_file.samplerate n_channels = input_file.channels s_start = int(np.round(sr_native * offset)) * n_channels if duration is None: s_end = np.inf else: s_end = s_start + (int(np.round(sr_native * duration)) * n_channels) n = 0 for frame in input_file: frame = util.buf_to_float(frame, dtype=dtype) n_prev = n n = n + len(frame) if n < s_start: # offset is after the current frame # keep reading continue if s_end < n_prev: # we're off the end. stop reading break if s_end < n: # the end is in this frame. crop. frame = frame[:s_end - n_prev] if n_prev <= s_start <= n: # beginning is in this frame frame = frame[(s_start - n_prev):] # tack on the current frame y.append(frame) if y: y = np.concatenate(y) if n_channels > 1: y = y.reshape((-1, n_channels)).T if mono: y = to_mono(y) if sr is not None: y = resample(y, sr_native, sr, res_type=res_type) else: sr = sr_native # Final cleanup for dtype and contiguity y = np.ascontiguousarray(y, dtype=dtype) return y, sr
def load_yield_chunks(path, sr=22050, mono=True, offset=0.0, duration=None, dtype=np.float32, res_type='kaiser_best', choplenspls=0, hoplenspls=0): """Load an audio file as a floating point time series. This is MODIFIED from librosa's own load() function, to yield chunks one-by-one so they never all need to be loaded into memory. Parameters ---------- path : string path to the input file. Any format supported by `audioread` will work. sr : number > 0 [scalar] target sampling rate 'None' uses the native sampling rate mono : bool convert signal to mono offset : float start reading after this time (in seconds) duration : float only load up to this much audio (in seconds) dtype : numeric type data type of `y` res_type : str resample type (see note) .. note:: By default, this uses `resampy`'s high-quality mode ('kaiser_best'). To use a faster method, set `res_type='kaiser_fast'`. To use `scipy.signal.resample`, set `res_type='scipy'`. choplenspls : int number of samples in each chunk to be yielded. Returns ------- y : np.ndarray [shape=(n,) or (2, n)] audio time series sr : number > 0 [scalar] sampling rate of `y` Examples -------- >>> # Load a wav file >>> filename = librosa.util.example_audio_file() >>> y, sr = librosa.load(filename) >>> y array([ -4.756e-06, -6.020e-06, ..., -1.040e-06, 0.000e+00], dtype=float32) >>> sr 22050 >>> # Load a wav file and resample to 11 KHz >>> filename = librosa.util.example_audio_file() >>> y, sr = librosa.load(filename, sr=11025) >>> y array([ -2.077e-06, -2.928e-06, ..., -4.395e-06, 0.000e+00], dtype=float32) >>> sr 11025 >>> # Load 5 seconds of a wav file, starting 15 seconds in >>> filename = librosa.util.example_audio_file() >>> y, sr = librosa.load(filename, offset=15.0, duration=5.0) >>> y array([ 0.069, 0.1 , ..., -0.101, 0. ], dtype=float32) >>> sr 22050 """ if not hoplenspls or (hoplenspls <= 0 or hoplenspls > choplenspls): hoplenspls = choplenspls y = np.array([], dtype=dtype) with audioread.audio_open(os.path.realpath(path)) as input_file: sr_native = input_file.samplerate n_channels = input_file.channels s_start = int(np.round(sr_native * offset)) * n_channels if duration is None: s_end = np.inf else: s_end = s_start + (int(np.round(sr_native * duration)) * n_channels) n = 0 for frame in input_file: frame = util.buf_to_float(frame, dtype=dtype) n_prev = n n = n + len(frame) if n < s_start: # offset is after the current frame # keep reading continue if s_end < n_prev: # we're off the end. stop reading break if s_end < n: # the end is in this frame. crop. frame = frame[:s_end - n_prev] if n_prev <= s_start <= n: # beginning is in this frame frame = frame[(s_start - n_prev):] # NB here we apply to one single frame, the postprocessing that librosa applies to the whole file at the end if n_channels > 1: frame = frame.reshape((-1, n_channels)).T if mono: frame = to_mono(frame) if sr is not None: frame = resample(frame, sr_native, sr, res_type=res_type) else: sr = sr_native # Final cleanup for dtype and contiguity frame = np.ascontiguousarray(frame, dtype=dtype) y = np.concatenate((y, frame)) while y.shape[0] >= choplenspls: yield (y[:choplenspls], sr) y = y[hoplenspls:] if y.shape[0] != 0: print( "WARNING: load_yield_chunks() dropped %i final samples" % (y.shape[0]) ) # TODO can the final incomplete chunk be handled elegantly within the above loop?
def mono_detection(sig): if len(sig.shape) == 2: sig = to_mono(sig.T) return sig else: return sig