def __call__(self, src: str) -> tuple: speech, sr = sf.read(src) lowcut, highcut, order = self.calc_params() filtered_speech = butter_bandpass_filter(speech, lowcut, highcut, sr, order) filtered_normalized = normalize(filtered_speech) return filtered_normalized, sr
def __call__(self, src) -> tuple: impulse_response, sr_imp = self.load_impulse() speech, sr_speech = sf.read(src, always_2d=True) wet_speech = self.convolve_impulse_response(speech, impulse_response, sr_speech, sr_imp) wet_speech = wet_speech * self.cfg.artifact_scaling_factor mixed = np.add(speech, wet_speech) mixed = normalize(mixed) mono_mixed = stereo_to_mono(mixed) return mono_mixed, sr_speech
def mix_samples(sample_a: np.ndarray, sample_b: np.ndarray, trim_to: str = "a") -> np.ndarray: """ @param sample_a: first audio-sample @param sample_b: second audio-sample @param trim_to: resulting length will be length of sample_a or sample_b, if the sample not specified is shorter than the specified one, it will be repeated to match the specified samples length @return: sum of both samples, normalized to avoid clipping """ if trim_to == "b": sample_a, sample_b = sample_b, sample_a sample_b = match_length(sample_b, sample_a.shape[0]) add = np.add(sample_a, sample_b) return normalize(add)
def read_audio(filename, scipy_=True, normalization=True): if scipy_: audio, sample_rate = wavread(filename) audio = audio.astype('float32') if audio.ndim >= 2: audio = np.mean(audio, 1) else: with soundfile.SoundFile(filename) as sound_file: audio = sound_file.read(dtype='float32') sample_rate = sound_file.samplerate if audio.ndim >= 2: audio = np.mean(audio, 1) if normalization: audio = normalize(audio, sample_rate) return audio, sample_rate
def test_normalize(self): sample = 2 * np.random.random_sample(128) res = normalize(sample) assert ((res >= -1).all() and (res <= 1).all())