def run(audio_file: str, out_path: str, model_name: str, pretrained_path: str, lowpass_freq: int = 0): wav, sr = librosa.load(audio_file, sr=settings.SAMPLE_RATE) wav = preemphasis(wav) if wav.dtype != np.float32: wav = wav.astype(np.float32) # load model model = __load_model(model_name, pretrained_path) # make tensor wav wav = torch.FloatTensor(wav).unsqueeze(0).cuda() # inference print('Inference ...') with torch.no_grad(): out_wav = model(wav) out_wav = out_wav[0].cpu().numpy() if lowpass_freq: out_wav = lowpass(out_wav, frequency=lowpass_freq) # save wav librosa.output.write_wav(out_path, inv_preemphasis(out_wav).clip(-1., 1.), settings.SAMPLE_RATE) print('Finish !')
def __getitem__(self, idx: int) -> List[Any]: res = super().__getitem__(idx) # augmentation with -1 if np.random.randint(2): res[0] = res[0] * -1 res[1] = res[1] * -1 # augmentation with audioset data / once on three times if np.random.randint(3): rand_amp = np.random.rand() * 0.5 + 0.5 res[0] = augment(res[1], amp=rand_amp) # do volume augmentation rand_vol = np.random.rand() + 0.5 # 0.5 ~ 1.5 res[0] = np.clip(res[0] * rand_vol, -1, 1) res[1] = np.clip(res[1] * rand_vol, -1, 1) # pre emphasis res[0] = preemphasis(res[0]) res[1] = preemphasis(res[1]) return res
def test_dir(in_dir: str, out_dir: str, model_name: str, pretrained_path: str): # listup files print('List up wave files in given directory ...') file_list = glob.glob(os.path.join(in_dir, '*.wav')) # load model model = __load_model(model_name, pretrained_path) # mkdir os.makedirs(out_dir, exist_ok=True) # loop all print('Process files ...') noise_all = [] results = [] # TODO: Convert it to batch mode for file_path in tqdm(file_list): # load wave origin_wav, _ = librosa.load(file_path, sr=settings.SAMPLE_RATE) # default, preemp wav = preemphasis(origin_wav) # wave to cuda tensor wav = torch.FloatTensor(wav).unsqueeze(0).cuda() # inference with torch.no_grad(): clean_hat = model(wav) noise_all.append(origin_wav) results.append(clean_hat.squeeze()) # write all print('Write all result into {} ...'.format(out_dir)) for file_path, clean_hat, noise in zip(file_list, results, noise_all): file_name = os.path.basename(file_path).split('.')[0] noise_out_path = os.path.join(out_dir, '{}_noise.wav'.format(file_name)) clean_out_path = os.path.join(out_dir, '{}_pred.wav'.format(file_name)) librosa.output.write_wav(noise_out_path, noise, settings.SAMPLE_RATE) librosa.output.write_wav(clean_out_path, inv_preemphasis(clean_hat.cpu().numpy()), settings.SAMPLE_RATE) print('Finish !')
def run(*audio_files: [str], out_path: str, model_name: str, pretrained_path: str, lowpass_freq: int = 0, sample_rate: int = 22050): for audio_file in audio_files: print('Loading audio file...') wav, sr = librosa.load(audio_file, sr=sample_rate) wav = preemphasis(wav) if wav.dtype != np.float32: wav = wav.astype(np.float32) # load model model = __load_model(model_name, pretrained_path) # make tensor wav wav = torch.FloatTensor(wav).unsqueeze(0) # inference print('Inference ...') with torch.no_grad(): out_wav = model(wav) out_wav = out_wav[0].cpu().numpy() if lowpass_freq: out_wav = lowpass(out_wav, frequency=lowpass_freq) # save wav librosa.output.write_wav( os.path.join( out_path, os.path.basename(audio_file).rsplit(".", 1)[0] + "_out.wav"), inv_preemphasis(out_wav).clip(-1., 1.), sample_rate) print('Finish !')
def __getitem__(self, idx): wav = librosa.load(self.wav_list[idx], sr=self.sample_rate)[0].squeeze() if len(wav) > self.sample_rate * self.max_len: wav = np.zeros(1) return [preemphasis(wav), np.array([len(wav)])]