def evaluate(args): total_SISNRi = 0 total_SDRi = 0 total_cnt = 0 # Load model model = ConvTasNet.load_model(args.model_path) print(model) model.eval() #if args.use_cuda: if True: model.cuda() # Load data dataset = AudioDataset(args.data_dir, args.batch_size, sample_rate=args.sample_rate, segment=-1) data_loader = AudioDataLoader(dataset, batch_size=1, num_workers=2) with torch.no_grad(): for i, (data) in enumerate(data_loader): # Get batch data padded_mixture, mixture_lengths, padded_source = data #if args.use_cuda: if True: padded_mixture = padded_mixture.cuda() mixture_lengths = mixture_lengths.cuda() padded_source = padded_source.cuda() # Forward estimate_source = model(padded_mixture) # [B, C, T] loss, max_snr, estimate_source, reorder_estimate_source = \ cal_loss(padded_source, estimate_source, mixture_lengths) # Remove padding and flat mixture = remove_pad(padded_mixture, mixture_lengths) source = remove_pad(padded_source, mixture_lengths) # NOTE: use reorder estimate source estimate_source = remove_pad(reorder_estimate_source, mixture_lengths) # for each utterance for mix, src_ref, src_est in zip(mixture, source, estimate_source): print("Utt", total_cnt + 1) # Compute SDRi if args.cal_sdr: avg_SDRi = cal_SDRi(src_ref, src_est, mix) total_SDRi += avg_SDRi print("\tSDRi={0:.2f}".format(avg_SDRi)) # Compute SI-SNRi avg_SISNRi = cal_SISNRi(src_ref, src_est, mix) print("\tSI-SNRi={0:.2f}".format(avg_SISNRi)) total_SISNRi += avg_SISNRi total_cnt += 1 if args.cal_sdr: print("Average SDR improvement: {0:.2f}".format(total_SDRi / total_cnt)) print("Average SISNR improvement: {0:.2f}".format(total_SISNRi / total_cnt))
def separate(args): if args.mix_dir is None and args.mix_json is None: print("Must provide mix_dir or mix_json! When providing mix_dir, " "mix_json is ignored.") # Load model model = ConvTasNet.load_model(args.model_path) print(model) model.eval() if args.use_cuda: model.cuda() # Load data eval_dataset = EvalDataset(args.mix_dir, args.mix_json, batch_size=args.batch_size, sample_rate=args.sample_rate) eval_loader = EvalDataLoader(eval_dataset, batch_size=1) os.makedirs(args.out_dir, exist_ok=True) def write(inputs, filename, sr=args.sample_rate): #librosa.output.write_wav(filename, inputs, sr)# norm=True) #librosa.output.write_wav(filename, inputs, sr, norm=True) #print(inputs) #inputs = inputs / max(np.abs(inputs)) inputs = inputs / (2 * max(np.abs(inputs))) #print(inputs) sf.write(filename, inputs, sr) #sf.write(filename, inputs, sr, 'PCM_16') with torch.no_grad(): for (i, data) in enumerate(eval_loader): # Get batch data mixture, mix_lengths, filenames = data if args.use_cuda: mixture, mix_lengths = mixture.cuda(), mix_lengths.cuda() # Forward estimate_source = model(mixture) # [B, C, T] # Remove padding and flat flat_estimate = remove_pad(estimate_source, mix_lengths) mixture = remove_pad(mixture, mix_lengths) # Write result for i, filename in enumerate(filenames): filename = os.path.join( args.out_dir, os.path.basename(filename).strip('.wav')) write(mixture[i], filename + '.wav') C = flat_estimate[i].shape[0] for c in range(C): write(flat_estimate[i][c], filename + '_s{}.wav'.format(c + 1))