def validation_for_A_dir(self): num_mcep = 24 sampling_rate = 16000 frame_period = 5.0 n_frames = 128 validation_A_dir = self.validation_A_dir output_A_dir = self.output_A_dir print("Generating Test Data B from A...") for file in os.listdir(validation_A_dir): filePath = os.path.join(validation_A_dir, file) wav, _ = librosa.load(filePath, sr=sampling_rate, mono=True) wav = preprocess.wav_padding(wav=wav, sr=sampling_rate, frame_period=frame_period, multiple=4) f0, timeaxis, sp, ap = preprocess.world_decompose( wav=wav, fs=sampling_rate, frame_period=frame_period) f0_converted = preprocess.pitch_conversion( f0=f0, mean_log_src=self.log_f0s_mean_A, std_log_src=self.log_f0s_std_A, mean_log_target=self.log_f0s_mean_B, std_log_target=self.log_f0s_std_B) coded_sp = preprocess.world_encode_spectral_envelop( sp=sp, fs=sampling_rate, dim=num_mcep) coded_sp_transposed = coded_sp.T coded_sp_norm = (coded_sp_transposed - self.coded_sps_A_mean) / self.coded_sps_A_std coded_sp_norm = np.array([coded_sp_norm]) if torch.cuda.is_available(): coded_sp_norm = torch.from_numpy(coded_sp_norm).cuda().float() else: coded_sp_norm = torch.from_numpy(coded_sp_norm).float() coded_sp_converted_norm = self.generator_A2B(coded_sp_norm) coded_sp_converted_norm = coded_sp_converted_norm.cpu().detach( ).numpy() coded_sp_converted_norm = np.squeeze(coded_sp_converted_norm) coded_sp_converted = coded_sp_converted_norm * \ self.coded_sps_B_std + self.coded_sps_B_mean coded_sp_converted = coded_sp_converted.T coded_sp_converted = np.ascontiguousarray(coded_sp_converted) decoded_sp_converted = preprocess.world_decode_spectral_envelop( coded_sp=coded_sp_converted, fs=sampling_rate) wav_transformed = preprocess.world_speech_synthesis( f0=f0_converted, decoded_sp=decoded_sp_converted, ap=ap, fs=sampling_rate, frame_period=frame_period) librosa.output.write_wav(path=os.path.join(output_A_dir, os.path.basename(file)), y=wav_transformed, sr=sampling_rate) print("finish!")
def world_encode_data_toSave(num_mcep, hdf5_dir, wav_dir, sr, frame_period=5.0, coded_dim24=24, coded_dim36=36): f0s = list() timeaxes = list() sps = list() aps = list() coded_sps24 = list() coded_sps36 = list() coded_sps32 = list() for file in os.listdir(wav_dir): file_path = os.path.join(wav_dir, file) wav, _ = librosa.load(file_path, sr=sr, mono=True) f0, timeaxis, sp, ap = world_decompose(wav=wav, fs=sr, frame_period=frame_period) coded_sp24 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=coded_dim24) coded_sp36 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=coded_dim36) coded_sp32 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=32) f0s.append(f0) timeaxes.append(timeaxis) sps.append(sp) aps.append(ap) coded_sps24.append(coded_sp24) coded_sps36.append(coded_sp36) coded_sps32.append(coded_sp32) # file write item = {"f0": f0, "timeaxe": timeaxis, "ap": ap, "sp": sp, "coded24": coded_sp24, "coded36": coded_sp36, "coded32": coded_sp32} hdf5_file_name = os.path.join(hdf5_dir, os.path.splitext(file)[0]+".h5") hdf5_write(hdf5_file_name, item) assert num_mcep == 24 or num_mcep == 36 or num_mcep == 32, "spectral envelop dimension misatch" if num_mcep == 24: coded_sps = coded_sps24 elif num_mcep == 36: coded_sps = coded_sps36 elif num_mcep == 32: coded_sps = coded_sps32 return f0s, timeaxes, sps, aps, coded_sps
def process_file(filePath): num_mcep = 24 sampling_rate = 16000 frame_period = 5.0 n_frames = 128 wav, _ = librosa.load(filePath, sr=sampling_rate, mono=True) wav = preprocess.wav_padding(wav=wav, sr=sampling_rate, frame_period=frame_period, multiple=4) f0, timeaxis, sp, ap = preprocess.world_decompose( wav=wav, fs=sampling_rate, frame_period=frame_period) coded_sp = preprocess.world_encode_spectral_envelop( sp=sp, fs=sampling_rate, dim=num_mcep) return coded_sp, f0
def test(filename): wav, _ = librosa.load(filename, sr=hp.rate) f0, timeaxis, sp, ap = world_decompose(wav, hp.rate) f0_converted = pitch_conversion(f0, log_f0s_mean_A, log_f0s_std_A, log_f0s_mean_B, log_f0s_std_B) coded_sp = world_encode_spectral_envelop(sp, hp.rate, hp.num_mceps) coded_sp_transposed = coded_sp.T coded_sp_norm = (coded_sp_transposed - coded_sps_A_mean) / coded_sps_A_std coded_sp_norm = seg_and_pad(coded_sp_norm, hp.n_frames) wav_forms = [] for i, sp_norm in enumerate(coded_sp_norm): sp_norm = np.expand_dims(sp_norm, axis=-1) coded_sp_converted_norm = infer(sp_norm) coded_sp_converted = coded_sp_converted_norm * coded_sps_B_std + coded_sps_B_mean coded_sp_converted = np.array(coded_sp_converted, dtype=np.float64).T coded_sp_converted = np.ascontiguousarray(coded_sp_converted) decode_sp_converted = world_decode_spectral_envelop( coded_sp_converted, hp.rate) if len(f0) < (i + 1) * hp.output_size: decode_sp_converted = decode_sp_converted[:len(f0) % hp.output_size] f0_piece = f0_converted[i * hp.output_size:i * hp.output_size + len(f0) % hp.output_size] ap_piece = ap[i * hp.output_size:i * hp.output_size + len(f0) % hp.output_size] wav_transformed = world_speech_synthesis(f0_piece, decode_sp_converted, ap_piece, hp.rate, hp.duration) wav_forms.append(wav_transformed) break else: f0_piece = f0_converted[i * hp.output_size:(i + 1) * hp.output_size] ap_piece = ap[i * hp.output_size:(i + 1) * hp.output_size] wav_transformed = world_speech_synthesis(f0_piece, decode_sp_converted, ap_piece, hp.rate, hp.duration) wav_forms.append(wav_transformed) wav_forms = np.concatenate(wav_forms) wav_forms = np.expand_dims(wav_forms, axis=-1) wav_forms = np.expand_dims(wav_forms, axis=0) return wav_forms
def world_encode_data_toSave_spec(num_mcep, hdf5_dir, wav_dir, sr, frame_period=5.0, coded_dim24=24, coded_dim36=36): # 1. 32dim MCEP [32 x frame] # 2. 512dim cheaptrick [512 x frame] # 3. 512dim spectrogram [512 x frame] f0s = list() timeaxes = list() sps = list() aps = list() coded_sps24 = list() coded_sps36 = list() coded_sps32 = list() spectrograms = list() def calc_spec_wav(wav, f0): sr = 44000 duration = 0.005 hop_size = int(sr * duration) # 44000 * 0.005 = 80 sample spectrograms = list() for i in range(wav.shape[0] // (hop_size) + 1): start = i * hop_size if f0[i] == 0: segment_wav = wav[start : start+1024] else: segment_wav = wav[start : start+int(3 * 1 / f0[i] * 44000)] fft_size = 1024 # D = np.abs(librosa.stft(segment_wav, n_fft=fft_size, hop_length=segment_wav.shape[0]*2)) if segment_wav.shape[0] == 0: D = np.zeros((513)) else: D = np.abs(librosa.stft(segment_wav, n_fft=fft_size, hop_length=2048))[:, 0] magnitude = D#[:-1] spectrograms.append(magnitude) return spectrograms for file in os.listdir(wav_dir): print("----") print(file) file_path = os.path.join(wav_dir, file) wav, _ = librosa.load(file_path, sr=sr, mono=True) f0, timeaxis, sp, ap = world_decompose(wav=wav, fs=sr, frame_period=frame_period) spectrogram = calc_spec_wav(wav, f0) coded_sp24 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=coded_dim24) coded_sp36 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=coded_dim36) coded_sp32 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=32) spectrogram = np.array(spectrogram) f0s.append(f0) timeaxes.append(timeaxis) sps.append(sp) aps.append(ap) coded_sps24.append(coded_sp24) coded_sps36.append(coded_sp36) coded_sps32.append(coded_sp32) spectrograms.append(spectrogram) # file write item = {"f0": f0, "timeaxe": timeaxis, "ap": ap, "sp": sp, "coded24": coded_sp24, "coded36": coded_sp36, "coded32": coded_sp32, "spectrogram": spectrogram} hdf5_file_name = os.path.join(hdf5_dir, os.path.splitext(file)[0]+".h5") hdf5_write(hdf5_file_name, item) assert num_mcep == 24 or num_mcep == 36 or num_mcep == 32, "spectral envelop dimension misatch" if num_mcep == 24: coded_sps = coded_sps24 elif num_mcep == 36: coded_sps = coded_sps36 elif num_mcep == 32: coded_sps = coded_sps32 return f0s, timeaxes, sps, aps, coded_sps, spectrograms
start = i * hop_size if f0[i] == 0: segment_wav = wav[start : start+1024] else: segment_wav = wav[start : start+int(3 * 1 / f0[i] * 44000)] fft_size = 1024 # D = np.abs(librosa.stft(segment_wav, n_fft=fft_size, hop_length=segment_wav.shape[0]*2)) if segment_wav.shape[0] == 0: D = np.zeros((513)) else: D = np.abs(librosa.stft(segment_wav, n_fft=fft_size, hop_length=2048))[:, 0] magnitude = D#[:-1] spectrograms.append(magnitude) return spectrograms spectrogram = calc_spec_wav(wav, f0) coded_sp24 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=24) coded_sp36 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=36) coded_sp32 = world_encode_spectral_envelop(sp=sp, fs=sr, dim=32) spectrogram = np.array(spectrogram) # file write item = {"f0": f0, "timeaxe": timeaxis, "ap": ap, "sp": sp, "coded24": coded_sp24, "coded36": coded_sp36, "coded32": coded_sp32, "spectrogram": spectrogram} hdf5_file_name = hdf_file hdf5_write(hdf5_file_name, item)
model.load_weights(latest) print('Loading cached data...') with open('./datasets/JSUT/jsut.p', 'rb') as f: coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std, log_f0s_mean_A, log_f0s_std_A = pickle.load( f) with open('./datasets/target_voice/target_voice.p', 'rb') as f: coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std, log_f0s_mean_B, log_f0s_std_B = pickle.load( f) wav, _ = librosa.load('./outputs/100002.wav', sr=hp.rate) f0, timeaxis, sp, ap = world_decompose(wav, hp.rate) f0_converted = pitch_conversion(f0, log_f0s_mean_A, log_f0s_std_A, log_f0s_mean_B, log_f0s_std_B) coded_sp = world_encode_spectral_envelop(sp, hp.rate, hp.num_mceps) coded_sp_transposed = coded_sp.T coded_sp_norm = (coded_sp_transposed - coded_sps_A_mean) / coded_sps_A_std coded_sp_norm = seg_and_pad(coded_sp_norm, hp.n_frames) wav_forms = [] for i, sp_norm in enumerate(coded_sp_norm): sp_norm = np.expand_dims(sp_norm, axis=-1) coded_sp_converted_norm = model([sp_norm, sp_norm])[1][0] coded_sp_converted = coded_sp_converted_norm * coded_sps_B_std + coded_sps_B_mean coded_sp_converted = np.array(coded_sp_converted, dtype=np.float64).T coded_sp_converted = np.ascontiguousarray(coded_sp_converted) decode_sp_converted = world_decode_spectral_envelop( coded_sp_converted, hp.rate) if len(f0) < (i + 1) * hp.output_size: decode_sp_converted = decode_sp_converted[:len(f0) % hp.output_size]
def conversion(model_path, data_dir, output_dir, no_spec=False): sampling_rate = 16000 num_mcep = 23 frame_period = 5.0 if not os.path.exists(output_dir): os.makedirs(output_dir) model = EncDecGen(num_mfc_features=23, pre_train=None) model.load(filepath=model_path) for file in os.listdir(data_dir): try: wav = scwav.read(os.path.join(data_dir, file)) wav = wav[1].astype(np.float64) wav = preproc.wav_padding(wav=wav, sr=sampling_rate, \ frame_period=frame_period, multiple=4) f0, sp, ap = preproc.world_decompose(wav=wav, \ fs=sampling_rate, frame_period=frame_period) code_sp = preproc.world_encode_spectral_envelop(sp, \ sampling_rate, dim=num_mcep) z_idx = np.where(f0<10.0)[0] f0 = scisig.medfilt(f0, kernel_size=3) f0 = generate_interpolation(f0) f0 = smooth(f0, window_len=13) f0 = np.reshape(f0, (1,-1,1)) code_sp = np.reshape(code_sp, (1,-1,num_mcep)) code_sp = np.transpose(code_sp, axes=(0,2,1)) f0 = np.transpose(f0, axes=(0,2,1)) # Prediction _, f0_conv, code_sp_conv = model.test(input_mfc=code_sp, \ input_pitch=f0) code_sp_conv = np.transpose(code_sp_conv, axes=(0,2,1)) f0_conv = np.asarray(np.reshape(f0_conv,(-1,)), np.float64) code_sp_conv = np.asarray(np.squeeze(code_sp_conv), np.float64) code_sp_conv = np.copy(code_sp_conv, order='C') sp_conv = preproc.world_decode_spectral_envelop(code_sp_conv, \ sampling_rate) f0_conv[z_idx] = 0.0 if no_spec == True: ec = np.reshape(np.sqrt(np.sum(np.square(sp), axis=1)), (-1,1)) ec_conv = np.reshape(np.sqrt(np.sum(np.square(sp_conv), axis=1)), \ (-1,1)) # Making sure silence remains silence sil_zone = np.where(ec<1e-10)[0] ec_conv[sil_zone] = 1e-10 sp = np.divide(np.multiply(sp, ec_conv), ec) sp = np.copy(sp, order='C') wav_transformed = preproc.world_speech_synthesis(f0=f0_conv, \ decoded_sp=sp, \ ap=ap, fs=sampling_rate, \ frame_period=frame_period) else: wav_transformed = preproc.world_speech_synthesis(f0=f0_conv, \ decoded_sp=sp_conv, \ ap=ap, fs=sampling_rate, \ frame_period=frame_period) librosa.output.write_wav(os.path.join(output_dir, \ os.path.basename(file)), wav_transformed, sampling_rate) print("Reconstructed file "+os.path.basename(file)) except Exception as ex: print(ex)
def train(emo_pair, train_dir, model_dir, model_name, \ random_seed, validation_dir, output_dir, \ pre_train=None, lambda_encoder=1, lambda_decoder=1, \ lambda_generator=1): np.random.seed(random_seed) num_epochs = 1000 mini_batch_size = 1 encoder_learning_rate = 0.0001 decoder_learning_rate = 0.0001 generator_learning_rate = 0.0001 sampling_rate = 16000 num_mcep = 23 frame_period = 5.0 n_frames = 128 lambda_encoder = lambda_encoder lambda_decoder = lambda_decoder lambda_generator = lambda_generator le_ld_lg = "le_"+str(lambda_encoder)+"_ld_"+str(lambda_decoder) \ +"_lg_"+str(lambda_generator)+'_'+emo_pair logger_file = './log/' + le_ld_lg + '.log' if not os.path.exists('./log'): os.mkdir('./log') if os.path.exists(logger_file): os.remove(logger_file) logging.basicConfig(filename="./log/logger_"+le_ld_lg+".log", \ level=logging.DEBUG) logging.info("encoder_loss - L1") logging.info("decoder_loss - L1") logging.info("generator_loss - L1") logging.info("lambda_encoder - {}".format(lambda_encoder)) logging.info("lambda_decoder - {}".format(lambda_decoder)) logging.info("lambda_generator - {}".format(lambda_generator)) if not os.path.isdir("./generated_pitch_spect/" + le_ld_lg): os.makedirs("./generated_pitch_spect/" + le_ld_lg) logging.info('Loading Data...') start_time = time.time() data_train = scio.loadmat(os.path.join(train_dir, 'momenta_train.mat')) data_valid = scio.loadmat(os.path.join(train_dir, 'momenta_valid.mat')) pitch_A_train = np.expand_dims(data_train['src_f0_feat'], axis=-1) pitch_B_train = np.expand_dims(data_train['tar_f0_feat'], axis=-1) mfc_A_train = data_train['src_mfc_feat'] mfc_B_train = data_train['tar_mfc_feat'] momenta_A2B_train = np.expand_dims(data_train['momenta_f0'], axis=-1) pitch_A_valid = np.expand_dims(data_valid['src_f0_feat'], axis=-1) pitch_B_valid = np.expand_dims(data_valid['tar_f0_feat'], axis=-1) mfc_A_valid = data_valid['src_mfc_feat'] mfc_B_valid = data_valid['tar_mfc_feat'] momenta_A2B_valid = np.expand_dims(data_valid['momenta_f0'], axis=-1) mfc_A_valid, pitch_A_valid, mfc_B_valid, pitch_B_valid, momenta_A2B_valid \ = preproc.sample_data(mfc_A=mfc_A_valid, pitch_A=pitch_A_valid, \ mfc_B=mfc_B_valid, pitch_B=pitch_B_valid, \ momenta_A2B=momenta_A2B_valid) if validation_dir is not None: validation_output_dir = os.path.join(output_dir, le_ld_lg) if not os.path.exists(validation_output_dir): os.makedirs(validation_output_dir) end_time = time.time() time_elapsed = end_time - start_time logging.info('Loading Done.') logging.info('Time Elapsed for Data Preprocessing: %02d:%02d:%02d' % (time_elapsed // 3600, \ (time_elapsed % 3600 // 60), \ (time_elapsed % 60 // 1))) model = EncDecGen( num_mfc_features=23, pre_train=pre_train) #use pre_train arg to provide trained model for epoch in range(1, num_epochs + 1): logging.info('Epoch: %d' % epoch) start_time_epoch = time.time() mfc_A, pitch_A, mfc_B, \ pitch_B, momenta_A2B = preproc.sample_data(mfc_A=mfc_A_train, \ pitch_A=pitch_A_train, mfc_B=mfc_B_train, \ pitch_B=pitch_B_train, momenta_A2B=momenta_A2B_train) n_samples = mfc_A.shape[0] batch_enc_loss = list() batch_dec_loss = list() batch_gen_loss = list() batch_tot_loss = list() for i in range(n_samples // mini_batch_size): start = i * mini_batch_size end = (i + 1) * mini_batch_size encoder_loss, decoder_loss, generator_loss, \ gen_momenta, gen_pitch, gen_mfc \ = model.train(input_mfc_A=mfc_A[start:end], \ input_mfc_B=mfc_B[start:end], \ input_pitch_A=pitch_A[start:end], \ input_pitch_B=pitch_B[start:end], \ input_momenta_A2B=momenta_A2B[start:end], \ lambda_encoder=lambda_encoder, \ lambda_decoder=lambda_decoder, \ lambda_generator=lambda_generator, \ encoder_learning_rate=encoder_learning_rate, \ decoder_learning_rate=decoder_learning_rate, \ generator_learning_rate = generator_learning_rate) batch_enc_loss.append(encoder_loss) batch_dec_loss.append(decoder_loss) batch_gen_loss.append(generator_loss) batch_tot_loss.append(lambda_encoder*encoder_loss \ + lambda_decoder*decoder_loss + lambda_generator*generator_loss) model.save(directory=model_dir, filename=model_name) logging.info("Train Encoder Loss- {}".format(np.mean(batch_enc_loss))) logging.info("Train Decoder Loss- {}".format(np.mean(batch_dec_loss))) logging.info("Train Generator Loss- {}".format( np.mean(batch_gen_loss))) logging.info("Train Total Loss- {}".format(np.mean(batch_tot_loss))) # Getting results on validation set valid_enc_loss = list() valid_dec_loss = list() valid_gen_loss = list() valid_tot_loss = list() for i in range(mfc_A_valid.shape[0]): gen_momenta, gen_pitch, gen_mfc, \ enc_loss, dec_loss, gen_loss, \ = model.compute_test_loss(input_mfc_A=mfc_A_valid[i:i+1], \ input_pitch_A=pitch_A_valid[i:i+1], \ input_momenta_A2B=momenta_A2B_valid[i:i+1], \ input_mfc_B=mfc_B_valid[i:i+1], \ input_pitch_B=pitch_B_valid[i:i+1]) valid_enc_loss.append(enc_loss) valid_dec_loss.append(dec_loss) valid_gen_loss.append(gen_loss) valid_tot_loss.append(lambda_encoder*enc_loss \ + lambda_decoder*dec_loss + lambda_generator*gen_loss) if epoch % 100 == 0: pylab.figure(figsize=(12, 12)) pylab.plot(pitch_A_valid[i].reshape(-1, ), label="Input Neutral") pylab.plot(pitch_B_valid[i].reshape(-1, ), label="Target Angry") pylab.plot(gen_pitch.reshape(-1, ), label="Generated Angry") pylab.plot(momenta_A2B_valid[i].reshape(-1, ), label="Target Momentum") pylab.plot(gen_momenta.reshape(-1, ), label="Generated Momentum") pylab.legend(loc=1) pylab.title("Epoch " + str(epoch) + " example " + str(i + 1)) pylab.savefig("./generated_pitch_spect/"+le_ld_lg+'/'+str(epoch)\ + "_"+str(i+1)+".png") pylab.close() logging.info("Valid Encoder Loss- {}".format(np.mean(valid_enc_loss))) logging.info("Valid Decoder Loss- {}".format(np.mean(valid_dec_loss))) logging.info("Valid Generator Loss- {}".format( np.mean(valid_gen_loss))) logging.info("Valid Total Loss- {}".format(np.mean(valid_tot_loss))) end_time_epoch = time.time() time_elapsed_epoch = end_time_epoch - start_time_epoch logging.info('Time Elapsed for This Epoch: %02d:%02d:%02d' % (time_elapsed_epoch // 3600, \ (time_elapsed_epoch % 3600 // 60), (time_elapsed_epoch % 60 // 1))) if validation_dir is not None: if epoch % 100 == 0: logging.info('Generating Validation Data B from A...') sys.stdout.flush() for file in sorted(os.listdir(validation_dir)): try: filepath = os.path.join(validation_dir, file) wav = scwav.read(filepath) wav = wav[1].astype(np.float64) wav = preproc.wav_padding(wav=wav, sr=sampling_rate, \ frame_period=frame_period, multiple=4) f0, sp, ap = preproc.world_decompose(wav=wav, \ fs=sampling_rate, frame_period=frame_period) code_sp = preproc.world_encode_spectral_envelop(sp, \ sampling_rate, dim=num_mcep) z_idx = np.where(f0 < 10.0)[0] f0 = scisig.medfilt(f0, kernel_size=3) f0 = generate_interpolation(f0) f0 = smooth(f0, window_len=13) f0 = np.reshape(f0, (1, -1, 1)) code_sp = np.reshape(code_sp, (1, -1, num_mcep)) code_sp = np.transpose(code_sp, axes=(0, 2, 1)) f0 = np.transpose(f0, axes=(0, 2, 1)) # Prediction _, f0_conv, code_sp_conv = model.test(input_mfc=code_sp, \ input_pitch=f0) code_sp_conv = np.transpose(code_sp_conv, axes=(0, 2, 1)) f0_conv = np.asarray(np.reshape(f0_conv, (-1, )), np.float64) code_sp_conv = np.asarray(np.squeeze(code_sp_conv), np.float64) code_sp_conv = np.copy(code_sp_conv, order='C') sp_conv = preproc.world_decode_spectral_envelop(code_sp_conv, \ sampling_rate) f0_conv[z_idx] = 0.0 wav_transformed = preproc.world_speech_synthesis(f0=f0_conv, \ decoded_sp=sp_conv, \ ap=ap, fs=sampling_rate, \ frame_period=frame_period) librosa.output.write_wav(os.path.join(validation_output_dir, \ os.path.basename(file)), wav_transformed, sampling_rate) logging.info("Reconstructed file " + os.path.basename(file)) except Exception as ex: logging.info(ex)