def inference(path_model, params_path, name_task, input_file_path, output_filepath, dafx_wise=0, max_len_sec=50): """Evalue a model, given a params file, the task, and output directory """ model_name = os.path.basename(path_model) kTask = name_task # Verify the task assert name_task in [ 'distortion', 'nonspeech', 'mastering' ], 'task should be "distortion", "nonspeech" or "mastering"' # loads config k = np.load(params_path, allow_pickle=True).item() # Define Constants kPathAudio = k['path_audio'] if kTask != 'mastering': kXRecording = k['x_recording'] kYRecording = k['y_recording'] kSR = k['sr'] kNumSamples = k['num_samples'] kBatchSize = k['batch_size'] kStepsPerEpoch = k['steps_per_epoch'] kEpochs = k['epochs'] kPatience = k['patience'] kHopSamples = k['hop_samples'] kGradientMethod = k['gradient_method'] kComputeSignalGradient = k['compute_signal_gradient'] kMultiprocess = k['multiprocess'] kParams = k['params'] kPluginUri = k['plugin_uri'] kParamMap = k['param_map'] kOutputLength = k['output_length'] kStereo = k['stereo'] kSetNonTrainableParameters = k['set_nontrainable_parameters'] kNewParameterRange = k['new_parameter_range'] kFxChain = True kGreedyDafxPretraining = k['greedy_dafx_pretraining'] kDefaultPretraining = k['default_pretraining'] kEncoder = k['encoder'] # Load test dataset or makes partition, for some tasks random_state seed should be the same as in training script data, samplerate = sf.read(input_file_path) xTestAudio = librosa.resample(data.T, samplerate, 22050) xTestAudio = xTestAudio[:min(kSR * 30, len(xTestAudio))] # Creates model print('Creating model...') model, encoder, dafx = models.deepAFx( kNumSamples, kSR, kHopSamples, kBatchSize, kParams, kPluginUri, kParamMap, kGradientMethod, kComputeSignalGradient, kMultiprocess, kEncoder, output_length=kOutputLength, stereo=kStereo, non_learnable_params_settings=kSetNonTrainableParameters, new_params_range=kNewParameterRange, fx_chain=kFxChain) # Loads model weights model.load_weights(path_model) # if fxchain=True loads the weights of the model that contains all plugins if kFxChain: dafx_wise = len(kPluginUri) else: dafx_wise = 1 dafx.set_greedy_pretraining(dafx_wise) model.compile() # Computes objective metric and saves audio files and parameter automations # It will process first 50 seconds of each test sample # or full sample for the distortion task print('Processing the test file...') if True: xtest = xTestAudio # Gets parameter prediction layer_name = 'logMelgram' intermediate_layer_model = keras.Model( inputs=model.input, outputs=model.get_layer(layer_name).output) kBlockSize = 64 # Block size for inference audio plugins xtest_w = utils.slicing(xtest, kNumSamples, kOutputLength) steps = int(np.ceil(len(xtest_w) / kBatchSize)) intermediate_output = intermediate_layer_model(xtest_w) parameters = encoder.predict(intermediate_output, batch_size=kBatchSize, steps=steps, verbose=1) xtest_w = utils.slicing(xtest, kOutputLength, kOutputLength) xtest_w_smooth = utils.slicing(xtest, kBlockSize, kBlockSize) # creates dafx plugins for inference if kFxChain: dafx_inference_smooth = lv2_plugin.LV2_Plugin_Chain( kPluginUri, kStereo, kSR, hop_samples=kBlockSize) dafx_inference_smooth.reset_plugin_state(kBlockSize * 100) for j, set_nontrainabel_paramenters in enumerate( kSetNonTrainableParameters): for i in set_nontrainabel_paramenters: dafx_inference_smooth.set_param( j, i, set_nontrainabel_paramenters[i]) else: dafx_inference_smooth = lv2_plugin.LV2_Plugin( kPluginUri, kSR, hop_samples=kBlockSize) for i in kSetNonTrainableParameters: dafx_inference_smooth.set_param(i, kSetNonTrainableParameters[i]) # Low pass filter the parameters, whether is a fx_chain or a single effect b1, a1 = scipy.signal.butter(4, 0.5, 'low') b2, a2 = scipy.signal.butter(4, 0.001, 'low') try: parameters_smooth_1 = [] for i in range(np.sum(kParams)): filtered_signal = scipy.signal.filtfilt( b1, a1, parameters[:, i]) parameters_smooth_1.append(filtered_signal) parameters_smooth_1 = np.asarray(parameters_smooth_1).T p_original_time = np.repeat(parameters, kOutputLength, axis=0)[:xtest.shape[0], :] p_smooth_1_time = np.repeat(parameters_smooth_1, kOutputLength, axis=0)[:xtest.shape[0], :] parameters_smooth_2 = [] for i in range(np.sum(kParams)): filtered_signal = scipy.signal.filtfilt( b2, a2, p_smooth_1_time[:, i]) parameters_smooth_2.append(filtered_signal) p_smooth_2_time = np.asarray(parameters_smooth_2).T parameters_resampled = scipy.signal.resample( p_smooth_2_time, xtest_w_smooth.shape[0]) parameters_resampled = np.clip(parameters_resampled, 0, 1) # If parameters length is too short, it only applies one filter (this is for models with large output frames) except: p_original_time = np.repeat(parameters, kOutputLength, axis=0)[:xtest.shape[0], :] parameters_smooth_2 = [] for i in range(np.sum(kParams)): filtered_signal = scipy.signal.filtfilt( b2, a2, p_original_time[:, i]) parameters_smooth_2.append(filtered_signal) p_smooth_2_time = np.asarray(parameters_smooth_2).T parameters_resampled = scipy.signal.resample( p_smooth_2_time, xtest_w_smooth.shape[0]) parameters_resampled = np.clip(parameters_resampled, 0, 1) ztest_smooth = utils.processFramesDAFx( dafx_inference_smooth, kParamMap, xtest_w_smooth, parameters_resampled, new_param_range=kNewParameterRange, stereo=kStereo, greedy_pretraining=dafx_wise) ztest_smooth = ztest_smooth[:xtest.shape[0]] z_smooth = ztest_smooth.copy() librosa.output.write_wav(output_filepath, z_smooth, kSR, norm=False) dafx.reset_dafx_state(kSR * 1) dafx.shutdown() del model, encoder
def __init__(self, x_audio, y_audio, dafxs, batch_size, length_samples, steps_per_epoch=1, sr=22050, pad=0, crop=False, output_length=1000, large_frame_length_secs=20, snr_db=30, task=0, center_frames=False, augment=True, default=False): self.batch_size = batch_size self.x_audio = x_audio self.y_audio = y_audio self.dafxs = dafxs self.sr = sr self.pad = pad self.steps_per_epoch = steps_per_epoch self.audio_time_len_samples = length_samples self.large_frame_length_secs = large_frame_length_secs self.indexes = np.repeat(np.arange(len(self.x_audio)), steps_per_epoch * batch_size) self.crop = crop self.output_time_len_samples = output_length np.random.shuffle(self.indexes) self.center_frames = center_frames self.augment = augment # if eq augmentation, run first eq_presets.py if self.augment: self.eq_hop_samples = 2048 self.eq = lv2_plugin.LV2_Plugin( 'http://calf.sourceforge.net/plugins/Equalizer8Band', self.sr, hop_samples=self.eq_hop_samples) self.eq_presets = np.load( '/home/code-base/runtime/deepafx/data/EQ_PRESETS.pkl.npy', allow_pickle=True) self.eq_stereo = True self.frame_idx = 0 self.x_frames, self.y_frames = self.load_frames() self.frame_total = self.x_frames.shape[1] self.default = default dafx = self.dafxs default = [] if dafx.fx_chain: for i in range(len(dafx.mb.plugin_uri)): d_ = utils.getParamInEncoderRange( np.asarray(dafx.mb.default_values[i]), np.asarray(dafx.mb.param_min[i]), np.asarray(dafx.mb.param_max[i])) default.append(d_) self.default_values = [ item for sublist in default for item in sublist ] else: d_ = utils.getParamInEncoderRange( np.asarray(dafx.mb.default_values), np.asarray(dafx.mb.param_min), np.asarray(dafx.mb.param_max)) default.append(d_) self.default_values = default self.default_values = np.asarray(self.default_values) self.default_values = np.expand_dims(self.default_values, axis=0) self.default_values = np.repeat(self.default_values, self.batch_size, axis=0)
def getLatencyPlugin(plugin_uri, sr, stereo=False): lv2_dafx = lv2_plugin.LV2_Plugin(plugin_uri, sr, verbose=False) return lv2_dafx.get_latency_plugin(stereo=stereo)
sr, hop_samples=hop_samples, k_pipe=k_pipe, verbose=False) tic = time.time() output_batch = multi_lv2.run_batch(batch_audio) par_time = time.time() - tic print('Par Time elapsed:', par_time) multi_lv2.shutdown() del multi_lv2 # trigger the thread join if True: print('RUNNING SEQUENCE') # Create a plugin for each signal plugins = [] for item in items: lv2_dafx = lv2_plugin.LV2_Plugin(plugin_uri, sr, hop_samples=hop_samples, verbose=False) plugins.append(lv2_dafx) tic = time.time() for signal, lv2_dafx in zip(items, plugins): lv2_dafx.runs(signal.transpose()) seq_time = time.time() - tic print('Seq Time elapsed:', seq_time) print('Delta Improvement', seq_time / par_time)
def __init__(self, multiprocess, num_processes, plugin_uri, sr, param_map, epsilon, use_fd, compute_x_grad, stereo, hop_samples=64, new_params_range=None, non_learnable_params_settings={}, fx_chain=False, greedy_pretraining=0, verbose=False): self.multiprocess = multiprocess if not multiprocess: num_processes = 1 self.plugin_uri = plugin_uri self.sr = sr self.hop_samples = hop_samples self.verbose = verbose self.num_processes = num_processes self.param_map = param_map self.epsilon = epsilon self.stereo = stereo self.fx_chain = fx_chain self.greedy_pretraining = greedy_pretraining self.non_learnable_params_settings = non_learnable_params_settings self.use_fd = use_fd self.compute_x_grad = compute_x_grad self.plugins = {} self.procs = {} # Create a plugin for each process self.plugins = {} self.plugins_e_plus = {} self.plugins_e_minus = {} for i in range(self.num_processes): if self.fx_chain: self.plugins[i] = lv2_plugin.LV2_Plugin_Chain(self.plugin_uri, self.stereo, self.sr, hop_samples=self.hop_samples) self.plugins_e_plus[i] = lv2_plugin.LV2_Plugin_Chain(self.plugin_uri, self.stereo, self.sr, hop_samples=self.hop_samples) self.plugins_e_minus[i] = lv2_plugin.LV2_Plugin_Chain(self.plugin_uri, self.stereo, self.sr, hop_samples=self.hop_samples) if self.non_learnable_params_settings: for j, set_nontrainable_parameters_plugin in enumerate(self.non_learnable_params_settings): for p in set_nontrainable_parameters_plugin: self.plugins[i].set_param(j, p, set_nontrainable_parameters_plugin[p]) self.plugins_e_plus[i].set_param(j, p, set_nontrainable_parameters_plugin[p]) self.plugins_e_minus[i].set_param(j, p, set_nontrainable_parameters_plugin[p]) noise = 0.01*np.random.normal(0, 1, size=self.hop_samples*10) out = self.plugins[i].runs(np.expand_dims(noise,0), greedy_pretraining=self.greedy_pretraining) out = self.plugins_e_plus[i].runs(np.expand_dims(noise,0), greedy_pretraining=self.greedy_pretraining) out = self.plugins_e_minus[i].runs(np.expand_dims(noise,0), greedy_pretraining=self.greedy_pretraining) else: self.plugins[i] = lv2_plugin.LV2_Plugin(self.plugin_uri, self.sr, hop_samples=self.hop_samples) self.plugins_e_plus[i] = lv2_plugin.LV2_Plugin(self.plugin_uri, self.sr, hop_samples=self.hop_samples) self.plugins_e_minus[i] = lv2_plugin.LV2_Plugin(self.plugin_uri, self.sr, hop_samples=self.hop_samples) noise = 0.1*np.random.normal(0, 1, size=self.hop_samples*10) if self.stereo: out = self.plugins[i].runs_stereo(np.expand_dims(noise,0)) out = self.plugins_e_plus[i].runs_stereo(np.expand_dims(noise,0)) out = self.plugins_e_minus[i].runs_stereo(np.expand_dims(noise,0)) else: out = self.plugins[i].runs(np.expand_dims(noise,0)) out = self.plugins_e_plus[i].runs(np.expand_dims(noise,0)) out = self.plugins_e_minus[i].runs(np.expand_dims(noise,0)) self.param_min = [] self.param_max = [] self.default_values = [] if self.fx_chain is False: self.param_range = {} for k in self.param_map: d, param_min_, param_max_ = self.get_param_range(self.param_map[k]) self.default_values.append(float(str(d))) self.param_range[k] = [float(str(param_min_)), float(str(param_max_))] elif self.fx_chain: self.param_range = [] for i, param_map_plugin in enumerate(self.param_map): param_range_dict = {} default_values = [] for k in param_map_plugin: d, param_min_, param_max_ = self.get_param_range(param_map_plugin[k], plugin_id=i) default_values.append(float(str(d))) param_range_dict[k] = [float(str(param_min_)), float(str(param_max_))] self.default_values.append(default_values) self.param_range.append(param_range_dict) self.update_param_min_max() if self.fx_chain is False: self.new_params_range = new_params_range if self.new_params_range: self.modify_parameter_range(self.new_params_range) elif self.fx_chain: for i, new_params_range_plugin in enumerate(new_params_range): if new_params_range_plugin: self.modify_parameter_range(new_params_range_plugin, plugin_id=i)
def evaluate(path_model, params_path, name_task, output_dir, dafx_wise=0, max_len_sec=50): """Evalue a model, given a params file, the task, and output directory """ model_name = os.path.basename(path_model) kTask = name_task # Verify the task assert name_task in [ 'distortion', 'nonspeech', 'mastering' ], 'task should be "distortion", "nonspeech" or "mastering"' if not os.path.isdir(output_dir): os.makedirs(output_dir) print("creating folder : ", output_dir) # loads config k = np.load(params_path, allow_pickle=True).item() # Define Constants kPathAudio = k['path_audio'] if kTask != 'mastering': kXRecording = k['x_recording'] kYRecording = k['y_recording'] kSR = k['sr'] kNumSamples = k['num_samples'] kBatchSize = k['batch_size'] kStepsPerEpoch = k['steps_per_epoch'] kEpochs = k['epochs'] kPatience = k['patience'] kHopSamples = k['hop_samples'] kGradientMethod = k['gradient_method'] kComputeSignalGradient = k['compute_signal_gradient'] kMultiprocess = k['multiprocess'] kParams = k['params'] kPluginUri = k['plugin_uri'] kParamMap = k['param_map'] kOutputLength = k['output_length'] kStereo = k['stereo'] kSetNonTrainableParameters = k['set_nontrainable_parameters'] kNewParameterRange = k['new_parameter_range'] kFxChain = True kGreedyDafxPretraining = k['greedy_dafx_pretraining'] kDefaultPretraining = k['default_pretraining'] kEncoder = k['encoder'] # Load test dataset or makes partition, for some tasks random_state seed should be the same as in training script if kTask == 'nonspeech': print('Loading test partition...') xPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kXRecording), '*.wav') xTest = [] for path in xPathFiles: if 'f9' in path or 'm9' in path: xTest.append(path) xTestAudio = [] for path in xTest: audio, _ = sf.read(path) xTestAudio.append(audio) yPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kYRecording), '*.wav') yTest = [] for path in yPathFiles: if 'f9' in path or 'm9' in path: yTest.append(path) yTestAudio = [] for path in yTest: audio, _ = sf.read(path) yTestAudio.append(audio) xTestAudio = utils.highpassFiltering(xTestAudio, 100, kSR) yTestAudio = utils.highpassFiltering(yTestAudio, 100, kSR) elif kTask == 'distortion': xPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kXRecording), '*.wav') yPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kYRecording), '*.wav') xTrain, xTest, yTrain, yTest = sklearn.model_selection.train_test_split( xPathFiles, yPathFiles, test_size=0.10, random_state=0) xTrain, xValid, yTrain, yValid = sklearn.model_selection.train_test_split( xTrain, yTrain, test_size=0.11111111111, random_state=0) xTestAudio = [] for path in xTest: audio, _ = sf.read(path) xTestAudio.append(audio) yTestAudio = [] for path in yTest: audio, _ = sf.read(path) yTestAudio.append(audio) elif kTask == 'mastering': kPathFiles = utils.getFilesPath(kPathAudio, '*.wav') xPathFiles = [] yPathFiles = [] for path in kPathFiles: name = path.split('/')[-1] recording = name.split('-')[0] if recording[-1] is 'a': xPathFiles.append(path) elif recording[-1] is 'b': yPathFiles.append(path) xPathFiles.sort() yPathFiles.sort() xTrain, xTest, yTrain, yTest = sklearn.model_selection.train_test_split( xPathFiles, yPathFiles, test_size=0.10, random_state=0) xTrain, xValid, yTrain, yValid = sklearn.model_selection.train_test_split( xTrain, yTrain, test_size=0.11111111111, random_state=0) xTestAudio = [] for path in xTest: audio, _ = sf.read(path) audio = librosa.core.to_mono(audio.T) audio = utils.lufs_normalize(audio, kSR, -25.0) xTestAudio.append(audio) yTestAudio = [] for path in yTest: audio, _ = sf.read(path) audio = librosa.core.to_mono(audio.T) yTestAudio.append(audio) # Creates model print('Creating model...') model, encoder, dafx = models.deepAFx( kNumSamples, kSR, kHopSamples, kBatchSize, kParams, kPluginUri, kParamMap, kGradientMethod, kComputeSignalGradient, kMultiprocess, kEncoder, output_length=kOutputLength, stereo=kStereo, non_learnable_params_settings=kSetNonTrainableParameters, new_params_range=kNewParameterRange, fx_chain=kFxChain) # Creates generators if kTask == 'nonspeech': genTest = generators.Data_Generator_Stateful_Nonspeech( xTestAudio, yTestAudio, dafx, kBatchSize, kNumSamples, steps_per_epoch=kStepsPerEpoch, sr=kSR, pad=0, crop=True, output_length=kOutputLength, large_frame_length_secs=10, augment=True) elif kTask == 'distortion': genTest = generators.Data_Generator_Stateful_Distortion( xTestAudio, yTestAudio, dafx, kBatchSize, kNumSamples, steps_per_epoch=kStepsPerEpoch, sr=kSR, pad=0, crop=True, output_length=kOutputLength, center_frames=True) elif kTask == 'mastering': genTest = generators.Data_Generator_Stateful_Mastering( xTestAudio, yTestAudio, dafx, kBatchSize, kNumSamples, steps_per_epoch=kStepsPerEpoch, sr=kSR, pad=0, crop=True, output_length=kOutputLength, large_frame_length_secs=10, augment=False) # Loss_function spectral_loss = losses.multiScaleSpectralLoss(loss_type='L2', mag_weight=1., logmag_weight=1., time_loss=True, time_loss_type='L1', time_loss_weight=10.0, fft_sizes=(1024, ), overlap=0.0, time_shifting=True, batch_size=kBatchSize) # Loads model weights model.load_weights(path_model) # if fxchain=True loads the weights of the model that contains all plugins if kFxChain: dafx_wise = len(kPluginUri) else: dafx_wise = 1 dafx.set_greedy_pretraining(dafx_wise) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss=spectral_loss, metrics=['mae']) # Computes test loss print('Computing test loss...') metrics = {} K.set_learning_phase(0) score = model.evaluate(genTest, steps=int(kStepsPerEpoch * 0.1), batch_size=kBatchSize, verbose=1, return_dict=True) metrics['test_losses'] = score # Computes objective metric and saves audio files and parameter automations # It will process first 50 seconds of each test sample # or full sample for the distortion task secs = [0, max_len_sec] mfcc_cosine = [] print('Processing the test dataset...') for idx_track in range(len(xTestAudio)): xtest = xTestAudio[idx_track][secs[0] * kSR:secs[1] * kSR] ytest = yTestAudio[idx_track][secs[0] * kSR:secs[1] * kSR] # Gets parameter prediction layer_name = 'logMelgram' intermediate_layer_model = keras.Model( inputs=model.input, outputs=model.get_layer(layer_name).output) kBlockSize = 64 # Block size for inference audio plugins xtest_w = utils.slicing(xtest, kNumSamples, kOutputLength) steps = int(np.ceil(len(xtest_w) / kBatchSize)) intermediate_output = intermediate_layer_model(xtest_w) parameters = encoder.predict(intermediate_output, batch_size=kBatchSize, steps=steps, verbose=1) xtest_w = utils.slicing(xtest, kOutputLength, kOutputLength) xtest_w_smooth = utils.slicing(xtest, kBlockSize, kBlockSize) # creates dafx plugins for inference. dafx_inference_smooth runs smoothed parameters if kFxChain: dafx_inference = lv2_plugin.LV2_Plugin_Chain( kPluginUri, kStereo, kSR, hop_samples=kOutputLength) dafx_inference_smooth = lv2_plugin.LV2_Plugin_Chain( kPluginUri, kStereo, kSR, hop_samples=kBlockSize) dafx_inference.reset_plugin_state(kOutputLength * 100) dafx_inference_smooth.reset_plugin_state(kBlockSize * 100) for j, set_nontrainabel_paramenters in enumerate( kSetNonTrainableParameters): for i in set_nontrainabel_paramenters: dafx_inference.set_param(j, i, set_nontrainabel_paramenters[i]) dafx_inference_smooth.set_param( j, i, set_nontrainabel_paramenters[i]) else: dafx_inference = lv2_plugin.LV2_Plugin(kPluginUri, kSR, hop_samples=kOutputLength) dafx_inference_smooth = lv2_plugin.LV2_Plugin( kPluginUri, kSR, hop_samples=kBlockSize) for i in kSetNonTrainableParameters: dafx_inference.set_param(i, kSetNonTrainableParameters[i]) dafx_inference_smooth.set_param(i, kSetNonTrainableParameters[i]) # Low pass filter the parameters, whether is a fx_chain or a single effect b1, a1 = scipy.signal.butter(4, 0.5, 'low') b2, a2 = scipy.signal.butter(4, 0.001, 'low') try: parameters_smooth_1 = [] for i in range(np.sum(kParams)): filtered_signal = scipy.signal.filtfilt( b1, a1, parameters[:, i]) parameters_smooth_1.append(filtered_signal) parameters_smooth_1 = np.asarray(parameters_smooth_1).T p_original_time = np.repeat(parameters, kOutputLength, axis=0)[:xtest.shape[0], :] p_smooth_1_time = np.repeat(parameters_smooth_1, kOutputLength, axis=0)[:xtest.shape[0], :] parameters_smooth_2 = [] for i in range(np.sum(kParams)): filtered_signal = scipy.signal.filtfilt( b2, a2, p_smooth_1_time[:, i]) parameters_smooth_2.append(filtered_signal) p_smooth_2_time = np.asarray(parameters_smooth_2).T parameters_resampled = scipy.signal.resample( p_smooth_2_time, xtest_w_smooth.shape[0]) parameters_resampled = np.clip(parameters_resampled, 0, 1) # If parameters length is too short, it only applies one filter (this is for models with large output frames) except: p_original_time = np.repeat(parameters, kOutputLength, axis=0)[:xtest.shape[0], :] parameters_smooth_2 = [] for i in range(np.sum(kParams)): filtered_signal = scipy.signal.filtfilt( b2, a2, p_original_time[:, i]) parameters_smooth_2.append(filtered_signal) p_smooth_2_time = np.asarray(parameters_smooth_2).T parameters_resampled = scipy.signal.resample( p_smooth_2_time, xtest_w_smooth.shape[0]) parameters_resampled = np.clip(parameters_resampled, 0, 1) # praces frames using parameters and smoothed parameters ztest = utils.processFramesDAFx(dafx_inference, kParamMap, xtest_w, parameters, new_param_range=kNewParameterRange, stereo=kStereo, greedy_pretraining=dafx_wise) ztest = ztest[:xtest.shape[0]] ztest_smooth = utils.processFramesDAFx( dafx_inference_smooth, kParamMap, xtest_w_smooth, parameters_resampled, new_param_range=kNewParameterRange, stereo=kStereo, greedy_pretraining=dafx_wise) ztest_smooth = ztest_smooth[:xtest.shape[0]] # Saves audio files and parameter automation x = xtest.copy() y = ytest.copy() z = ztest.copy() z_smooth = ztest_smooth.copy() librosa.output.write_wav(os.path.join(output_dir, f'{idx_track}_input.wav'), x, kSR, norm=False) librosa.output.write_wav(os.path.join(output_dir, f'{idx_track}_target.wav'), y, kSR, norm=False) librosa.output.write_wav(os.path.join(output_dir, f'{idx_track}_output.wav'), z_smooth, kSR, norm=False) np.save(os.path.join(output_dir, f'{idx_track}_parameters'), parameters_resampled) # Uncomment to save audio output and paramenters withouth smoothing: # librosa.output.write_wav(kPathModels+'results/'+kFullModel+f'_{idx_track}_output_nonsmooth.wav', # z, kSR, norm=False) # np.save(kPathModels+'results/'+kFullModel+f'_{idx_track}_parameters', parameters) d = utils.getMSE_MFCC(y, z_smooth, kSR, mean_norm=False) mfcc_cosine.append(d['cosine']) dafx.reset_dafx_state(kSR * 1) metrics['mfcc_cosine'] = str(round(np.mean(mfcc_cosine), 5)) print(metrics) print('audio samples saved at ' + output_dir) with open(os.path.join(output_dir, model_name + '_test_losses.json'), 'w') as outfile: json.dump(metrics, outfile) dafx.shutdown() del model, encoder, genTest