def evaluate_mia(ref, est, track_name, source_names, eval_silence, conf): references = ref.copy() estimates = est.copy() # If evaluate silence, skip examples with a silent source skip = False silence_frames = pd.DataFrame({ 'target': [], 'PES': [], 'EPS': [], 'track': [] }) if eval_silence: PES, EPS, _, __ = eval_silent_frames( true_source=references, predicted_source=estimates, window_size=int(conf['win'] * conf['sample_rate']), hop_size=int(conf['hop'] * conf['sample_rate'])) for i, target in enumerate(source_names): reference_energy = np.sum(references[i, :, :]**2) # estimate_energy = np.sum(estimates[i, :, :]**2) if reference_energy == 0: # or estimate_energy == 0: skip = True sdr = isr = sir = sar = (np.ones( (1, )) * (-np.inf), np.ones((1, )) * (-np.inf)) print("skip {}, {} source is all zero".format( track_name, target)) print("mean over evaluation frames, mean over channels") for target in source_names: silence_frames = silence_frames.append( { 'target': target, 'PES': PES[i], 'EPS': EPS[i], 'track': track_name }, ignore_index=True) print( target + ' ==>', silence_frames.loc[silence_frames['target'] == target].mean( axis=0, skipna=True)) # Compute metrics for a given song using window and ho size if not skip: sdr, isr, sir, sar = museval.evaluate( references, estimates, win=int(conf['win'] * conf['sample_rate']), hop=int(conf['hop'] * conf['sample_rate'])) # Save results over the track track_store = museval.TrackStore(win=conf['win'], hop=conf['hop'], track_name=track_name) for index, target in enumerate(source_names): values = { "SDR": sdr[index].tolist(), "SIR": sir[index].tolist(), "ISR": isr[index].tolist(), "SAR": sar[index].tolist() } track_store.add_target(target_name=target, values=values) track_store.validate() return track_store, silence_frames
def evaluate(model, musdb_path, eval_folder, workers=2, device="cpu", rank=0, save=False, shifts=0, split=False, overlap=0.25, is_wav=False, world_size=1): """ Evaluate model using museval. Run the model on a single GPU, the bottleneck being the call to museval. """ output_dir = eval_folder / "results" output_dir.mkdir(exist_ok=True, parents=True) json_folder = eval_folder / "results/test" json_folder.mkdir(exist_ok=True, parents=True) # we load tracks from the original musdb set test_set = musdb.DB(musdb_path, subsets=["test"], is_wav=is_wav) src_rate = 44100 # hardcoded for now... for p in model.parameters(): p.requires_grad = False p.grad = None pendings = [] with futures.ProcessPoolExecutor(workers or 1) as pool: for index in tqdm.tqdm(range(rank, len(test_set), world_size), file=sys.stdout): track = test_set.tracks[index] out = json_folder / f"{track.name}.json.gz" if out.exists(): continue mix = th.from_numpy(track.audio).t().float() ref = mix.mean(dim=0) # mono mixture mix = (mix - ref.mean()) / ref.std() mix = convert_audio(mix, src_rate, model.samplerate, model.audio_channels) estimates = apply_model(model, mix.to(device), shifts=shifts, split=split, overlap=overlap) estimates = estimates * ref.std() + ref.mean() estimates = estimates.transpose(1, 2) references = th.stack([ th.from_numpy(track.targets[name].audio).t() for name in model.sources ]) references = convert_audio(references, src_rate, model.samplerate, model.audio_channels) references = references.transpose(1, 2).numpy() estimates = estimates.cpu().numpy() win = int(1. * model.samplerate) hop = int(1. * model.samplerate) if save: folder = eval_folder / "wav/test" / track.name folder.mkdir(exist_ok=True, parents=True) for name, estimate in zip(model.sources, estimates): wavfile.write(str(folder / (name + ".wav")), 44100, estimate) if workers: pendings.append((track.name, pool.submit(museval.evaluate, references, estimates, win=win, hop=hop))) else: pendings.append((track.name, museval.evaluate(references, estimates, win=win, hop=hop))) del references, mix, estimates, track for track_name, pending in tqdm.tqdm(pendings, file=sys.stdout): print(track_name) if workers: pending = pending.result() print('pending') sdr, isr, sir, sar = pending print('track_store') track_store = museval.TrackStore(win=44100, hop=44100, track_name=track_name) for idx, target in enumerate(model.sources): print(target) values = { "SDR": sdr[idx].tolist(), "SIR": sir[idx].tolist(), "ISR": isr[idx].tolist(), "SAR": sar[idx].tolist() } track_store.add_target(target_name=target, values=values) json_path = json_folder / f"{track_name}.json.gz" gzip.open(json_path, "w").write(track_store.json.encode('utf-8')) if world_size > 1: distributed.barrier()
def evaluate(references, estimates, output_dir, track_name, sample_rate, win=1.0, hop=1.0, mode='v4'): """ Compute the BSS_eval metrics as well as PES and EPS. It is following the design concept of museval.eval_mus_track :param references: dict of reference sources {target_name: signal}, signal has shape: (nb_timesteps, np_channels) :param estimates: dict of user estimates {target_name: signal}, signal has shape: (nb_timesteps, np_channels) :param output_dir: path to output directory used to save evaluation results :param track_name: name that is assigned to TrackStore object for evaluated track :param win: evaluation window length in seconds, default 1 :param hop: evaluation window hop length in second, default 1 :param sample_rate: sample rate of test tracks (should be same as rate the model has been trained on) :param mode: BSSEval version, default to `v4` :return: bss_eval_data: museval.TrackStore object containing bss_eval evaluation scores silent_frames_data: Pandas data frame containing EPS and PES scores """ eval_targets = list(estimates.keys()) estimates_list = [] references_list = [] for target in eval_targets: estimates_list.append(estimates[target]) references_list.append(references[target]) # eval bass_eval and EPS, PES metrics # save in TrackStore object bss_eval_data = museval.TrackStore(win=win, hop=hop, track_name=track_name) # skip examples with a silent source because BSSeval metrics are not defined in this case skip = False for target in eval_targets: reference_energy = np.sum(references[target]**2) estimate_energy = np.sum(estimates[target]**2) if reference_energy == 0 or estimate_energy == 0: skip = True SDR = ISR = SIR = SAR = (np.ones((1, )) * (-np.inf), np.ones( (1, )) * (-np.inf)) print("skip {}, {} source is all zero".format(track_name, target)) if not skip: SDR, ISR, SIR, SAR = museval.evaluate(references_list, estimates_list, win=int(win * sample_rate), hop=int(hop * sample_rate), mode=mode, padding=True) # add evaluation of ESP and PES PES, EPS, _, __ = silent_frames_evaluation.eval_silent_frames( true_source=np.array(references_list), predicted_source=np.array(estimates_list), window_size=int(win * sample_rate), hop_size=int(hop * sample_rate)) # iterate over all targets for i, target in enumerate(eval_targets): values = { "SDR": SDR[i].tolist(), "SIR": SIR[i].tolist(), "ISR": ISR[i].tolist(), "SAR": SAR[i].tolist(), } bss_eval_data.add_target(target_name=target, values=values) silent_frames_data = pd.DataFrame({ 'target': [], 'PES': [], 'EPS': [], 'track': [] }) for i, target in enumerate(eval_targets): silent_frames_data = silent_frames_data.append( { 'target': target, 'PES': PES[i], 'EPS': EPS[i], 'track': track_name }, ignore_index=True) # save evaluation results if output directory is defined if output_dir: # validate against the schema bss_eval_data.validate() try: if not os.path.exists(output_dir): os.makedirs(output_dir) with open( os.path.join(output_dir, track_name.replace('/', '_')) + '.json', 'w+') as f: f.write(bss_eval_data.json) except (IOError): pass return bss_eval_data, silent_frames_data