def enrollment_dic_kwsTrials(self, listfilename, uttpath, utt2label, save_path, print_interval=10, num_eval=10, eval_frames=None, save_dic=False): self.eval() lines = [] enroll_utt = [] feats = {} tstart = time.time() ## Read all lines with open(listfilename) as listfile: lines = listfile.readlines() for line in lines: data = line.split() enroll_utt.append(data[0]) enroll_utt.append(data[1]) enroll_utt.append(data[2]) set_enroll_utt = list(set(enroll_utt)) set_enroll_utt.sort() ##extract enrollment data embeddings for idx, uttid in enumerate(set_enroll_utt): with torch.no_grad(): inp = torch.FloatTensor(loadWAV(uttpath + uttid, 0, True, 10)).cuda() embd = self.__S__.forward(inp).cpu() feats[uttid] = embd telapsed = time.time() - tstart if idx % print_interval == 0: sys.stdout.write( "\rReading %d of %d: %.2f Hz, embedding size %d" % (idx, len(set_enroll_utt), idx / telapsed, embd.size()[1])) feats_np = {} for utt in feats: feats_np[utt] = feats[utt].numpy() if save_dic == True: savenpy_path = save_path numpy.save(savenpy_path, feats_np) end = time.time() - tstart print("\n total time %.2f" % (end)) return feats_np
def get_embeddings(model, test_list, test_path, max_frames=1000): """2D scatter plot Parameters ---------- model : torch.nn.Module The loaded model. test_list : str The list of wav file names. test_path : str The wav file location. max_frames : int The max number of frames to process per utterance Returns ------- embeddings : numpy.ndarray Stacked speaker embeddings per utterance. Each row is an embedding. labels : list List of str. Corresponding speaker IDs. """ # TODO(alexbooth): what does num_eval do? num_eval = 10 wav_paths = parse_test_list(test_list) embeddings = None labels = [] for idx, wav_path in enumerate(wav_paths): input_ = loadWAV(os.path.join(test_path, wav_path), max_frames, evalmode=True, num_eval=num_eval).to(device) output = model.forward(input_).detach().cpu() if embeddings == None: embeddings = output.view(-1).unsqueeze(0) else: output = output.view(-1).unsqueeze(0) embeddings = torch.cat([embeddings, output]) labels.append(wav_path.split('/')[0]) return embeddings, labels
def loadAudio(file): audio = loadWAV(file, args.eval_frames, evalmode=True) return torch.FloatTensor(audio)
def evaluateFromList(self, listfilename, print_interval=100, test_path='', num_eval=10, eval_frames=None): self.eval() lines = [] files = [] feats = {} tstart = time.time() ## Read all lines with open(listfilename) as listfile: while True: line = listfile.readline() if (not line): break data = line.split() ## Append random label if missing if len(data) == 2: data = [random.randint(0, 1)] + data files.append(data[1]) files.append(data[2]) lines.append(line) setfiles = list(set(files)) setfiles.sort() ## Save all features to file for idx, file in enumerate(setfiles): inp1 = loadWAV(os.path.join(test_path, file), eval_frames, evalmode=True, num_eval=num_eval).to(device) ref_feat = self.__S__.forward(inp1).detach().cpu() filename = '%06d.wav' % idx feats[file] = ref_feat telapsed = time.time() - tstart if idx % print_interval == 0: sys.stdout.write( "\rReading %d of %d: %.2f Hz, embedding size %d" % (idx, len(setfiles), idx / telapsed, ref_feat.size()[1])) print('') all_scores = [] all_labels = [] all_trials = [] tstart = time.time() ## Read files and compute all scores for idx, line in enumerate(lines): data = line.split() ## Append random label if missing if len(data) == 2: data = [random.randint(0, 1)] + data ref_feat = feats[data[1]].to(device) com_feat = feats[data[2]].to(device) if self.__L__.test_normalize: ref_feat = F.normalize(ref_feat, p=2, dim=1) com_feat = F.normalize(com_feat, p=2, dim=1) dist = F.pairwise_distance(ref_feat.unsqueeze(-1), com_feat.unsqueeze(-1).transpose( 0, 2)).detach().cpu().numpy() score = -1 * numpy.mean(dist) all_scores.append(score) all_labels.append(int(data[0])) all_trials.append(data[1] + " " + data[2]) if idx % print_interval == 0: telapsed = time.time() - tstart sys.stdout.write("\rComputing %d of %d: %.2f Hz" % (idx, len(lines), idx / telapsed)) sys.stdout.flush() print('\n') return (all_scores, all_labels, all_trials)
def evaluateFromListSave(self, listfilename, print_interval=5000, feat_dir='', test_path='', num_eval=10): self.eval(); lines = [] files = [] filedict = {} feats = {} tstart = time.time() if feat_dir != '': print('Saving temporary files to %s'%feat_dir) if not(os.path.exists(feat_dir)): os.makedirs(feat_dir) ## Read all lines with open(listfilename) as listfile: while True: line = listfile.readline(); if (not line): # or (len(all_scores)==1000) break; data = line.split(); files.append(data[1]) files.append(data[2]) lines.append(line) setfiles = list(set(files)) setfiles.sort() ## Save all features to file for idx, file in enumerate(setfiles): inp1 = loadWAV(os.path.join(test_path,file), self.__max_frames__, evalmode=True, num_eval=num_eval).cuda() ref_feat = self.__S__.forward(inp1).detach().cpu() filename = '%06d.wav'%idx if feat_dir == '': feats[file] = ref_feat else: filedict[file] = filename torch.save(ref_feat,os.path.join(feat_dir,filename)) telapsed = time.time() - tstart if idx % print_interval == 0: sys.stdout.write("\rReading %d: %.2f Hz, embed size %d"%(idx,idx/telapsed,ref_feat.size()[1])); print('') all_scores = []; all_labels = []; tstart = time.time() ## Read files and compute all scores for idx, line in enumerate(lines): data = line.split(); if feat_dir == '': ref_feat = feats[data[1]].cuda() com_feat = feats[data[2]].cuda() else: ref_feat = torch.load(os.path.join(feat_dir,filedict[data[1]])).cuda() com_feat = torch.load(os.path.join(feat_dir,filedict[data[2]])).cuda() if self.__test_normalize__: ref_feat = F.normalize(ref_feat, p=2, dim=1) com_feat = F.normalize(com_feat, p=2, dim=1) dist = F.pairwise_distance(ref_feat.unsqueeze(-1).expand(-1,-1,num_eval), com_feat.unsqueeze(-1).expand(-1,-1,num_eval).transpose(0,2)).detach().cpu().numpy(); score = -1 * numpy.mean(dist); all_scores.append(score); all_labels.append(int(data[0])); if idx % print_interval == 0: telapsed = time.time() - tstart sys.stdout.write("\rComputing %d: %.2f Hz"%(idx,idx/telapsed)); sys.stdout.flush(); if feat_dir != '': print(' Deleting temporary files.') shutil.rmtree(feat_dir) print('\n') return (all_scores, all_labels);
threshold = parameter_dic['eer_threshold'] if len(lines[0].strip().split()) == 5: # on dev set print('Deal with dev set') tsatrt = time.time() for line in tqdm.tqdm(lines): data = line.strip().split() final_labels.append(data[4]) if utt2label[data[3]] == 'non-trigger': output_score.append('negative') continue else: with torch.no_grad(): uttid = data[3] + '.wav' if uttid not in eval_dic: inp = torch.FloatTensor( loadWAV(args.uttpath + uttid, 0, True, 10)).cuda() eval_embd = s.__S__.forward(inp).cpu().numpy() eval_dic[uttid] = eval_embd else: eval_embd = eval_dic[uttid] eval_embd = numpy.squeeze(eval_embd) enroll_embd = (enroll_dic[data[0]] + enroll_dic[data[1]] + enroll_dic[data[2]]) / 3 enroll_embd = numpy.squeeze(enroll_embd) result = 1 - spatial.distance.cosine(eval_embd, enroll_embd) if result < threshold: output_score.append('negative') else: output_score.append('positive') tend = time.time() - tsatrt print('total time: %.2f' % (tend))
tsatrt = time.time() for line in tqdm.tqdm(lines): # Only use 40000 lines of trial file, because the back contains stitched audio data = line.strip().split() final_labels.append(data[4]) if utt2label[data[3]] == 'negative': output_score.append('negative') continue elif (utt2label[data[3]] == 'trigger') & (u2l_template[data[3]] == 'positive'): with torch.no_grad(): uttid = data[3] + '.wav' if uttid not in eval_dic: inp = torch.FloatTensor( loadWAV(args.uttpath + uttid, 0, True, 10)).cuda() eval_embd = s.__S__.forward(inp).cpu().numpy() eval_dic[uttid] = eval_embd else: eval_embd = eval_dic[uttid] eval_embd = numpy.squeeze(eval_embd) enroll_embd = (enroll_dic[data[0]] + enroll_dic[data[1]] + enroll_dic[data[2]]) / 3 enroll_embd = numpy.squeeze(enroll_embd) result = 1 - spatial.distance.cosine(eval_embd, enroll_embd) scores.append(result) if data[4] == 'negative': labels.append(0) else: labels.append(1) output_score.append('tbd')
def evaluateFromList(self, wav1, wav2, print_interval=100, test_path='', num_eval=10, eval_frames=None): self.eval() lines = [] files = [] feats = {} tstart = time.time() lines = [wav1, wav2] setfiles = list(set(lines)) setfiles.sort() ## Save all features to file for idx, file in enumerate(setfiles): inp1 = torch.FloatTensor( loadWAV(os.path.join(test_path, file), eval_frames, evalmode=True, num_eval=num_eval)).cuda() ref_feat = self.__S__.forward(inp1).detach().cpu() filename = '%06d.wav' % idx feats[file] = ref_feat telapsed = time.time() - tstart if idx % print_interval == 0: sys.stdout.write( "\rReading %d of %d: %.2f Hz, embedding size %d" % (idx, len(setfiles), idx / telapsed, ref_feat.size()[1])) print('') tstart = time.time() ## Read files and compute all scores idx = 0 ref_feat = feats[wav1].cuda() com_feat = feats[wav2].cuda() if self.__L__.test_normalize: ref_feat = F.normalize(ref_feat, p=2, dim=1) com_feat = F.normalize(com_feat, p=2, dim=1) dist = F.pairwise_distance(ref_feat.unsqueeze(-1), com_feat.unsqueeze(-1).transpose( 0, 2)).detach().cpu().numpy() score = -1 * numpy.mean(dist) trial = wav1 + " " + wav2 if idx % print_interval == 0: telapsed = time.time() - tstart sys.stdout.write("\rComputing %d of %d: %.2f Hz" % (idx, len(lines), idx / telapsed)) sys.stdout.flush() print('\n') return (score, trial)
def evaluateFromList(self, listfilename, print_interval=100, test_path='', num_eval=0, eval_frames=None, step=0.2, save_path="./"): self.eval() lines = [] files = [] feats = {} tstart = time.time() ## Read all lines with open(listfilename) as listfile: while True: line = listfile.readline() if (not line): break data = line.strip() ## Append random label if missing # if len(data) == 2: data = [random.randint(0,1)] + data files.append(data) # files.append(data[2]) lines.append(line) setfiles = list(set(files)) setfiles.sort() ## Save all features to file for idx, file in enumerate(setfiles): wavs = loadWAV(os.path.join(test_path, file + '.wav'), eval_frames, evalmode=True, num_eval=num_eval, step=step) print('wavs size', len(wavs)) res = [] for c in chunks(wavs, 20): c = numpy.stack(c, axis=0).astype(numpy.float) inp1 = torch.FloatTensor(c).to(device) ref_feat = self.__S__.forward(inp1).detach().cpu() res.append(ref_feat) res = torch.cat(res) # import pickle # pickle.dump(res, open( f"{file}.p", "wb" ) ) with open(f'{save_path}/{file}.npy', 'wb') as f: numpy.save(f, res) filename = '%06d.wav' % idx feats[file] = ref_feat telapsed = time.time() - tstart if idx % print_interval == 0: sys.stdout.write( "\rReading %d of %d: %.2f Hz, embedding size %d" % (idx, len(setfiles), idx / telapsed, ref_feat.size()[1])) return all_scores = [] all_labels = [] all_trials = [] tstart = time.time() ## Read files and compute all scores for idx, line in enumerate(lines): data = line.split() ## Append random label if missing if len(data) == 2: data = [random.randint(0, 1)] + data ref_feat = feats[data[1]].to(device) com_feat = feats[data[2]].to(device) if self.__L__.test_normalize: ref_feat = F.normalize(ref_feat, p=2, dim=1) com_feat = F.normalize(com_feat, p=2, dim=1) dist = F.pairwise_distance(ref_feat.unsqueeze(-1), com_feat.unsqueeze(-1).transpose( 0, 2)).detach().cpu().numpy() score = -1 * numpy.mean(dist) all_scores.append(score) all_labels.append(int(data[0])) all_trials.append(data[1] + " " + data[2]) if idx % print_interval == 0: telapsed = time.time() - tstart sys.stdout.write("\rComputing %d of %d: %.2f Hz" % (idx, len(lines), idx / telapsed)) sys.stdout.flush() print('\n') return (all_scores, all_labels, all_trials)