def evaluate(feats, labels, num_pairs=10_000): indices = range(len(labels)) pairs = [(i, j) for i, j in product(indices, indices) if i != j] pairs = random.sample(pairs, num_pairs) sim = lambda u, v: u @ v x = np.array([sim(feats[i], feats[j]) for i, j in pairs]) y = np.array([int(labels[i] == labels[j]) for i, j in pairs]) eer, thresh = toolkits.calculate_eer(y, x) eer = eer * 100 print("τ: {:.3f} ".format(thresh)) print("EER: {:.3f}%".format(eer))
def evaluate(feats, labels, pairs): sys.path.insert(0, "/home/doneata/src/vgg-speaker-recognition/tool") import toolkits feats_r, feats_s = feats labels_r, labels_s = labels sim = lambda u, v: u @ v x = np.array([sim(feats_r[i], feats_s[j]) for i, j in pairs]) y = np.array([int(labels_r[i] == labels_s[j]) for i, j in pairs]) eer, thresh = toolkits.calculate_eer(y, x) eer = eer * 100 print(f"+: {sum(y)} / {len(y)}") print("τ: {:.3f} ".format(thresh)) print("EER: {:.3f}%".format(eer))
def main(): # gpu configuration toolkits.initialize_GPU(args) import model # ================================== # Get Train/Val. # ================================== print('==> calculating test({}) data lists...'.format(args.test_type)) if args.test_type == 'normal': verify_list = np.loadtxt('../meta/voxceleb1_veri_test.txt', str) elif args.test_type == 'hard': verify_list = np.loadtxt('../meta/voxceleb1_veri_test_hard.txt', str) elif args.test_type == 'extend': verify_list = np.loadtxt('../meta/voxceleb1_veri_test_extended.txt', str) else: raise IOError('==> unknown test type.') verify_lb = np.array([int(i[0]) for i in verify_list]) list1 = np.array([os.path.join(args.data_path, i[1]) for i in verify_list]) list2 = np.array([os.path.join(args.data_path, i[2]) for i in verify_list]) total_list = np.concatenate((list1, list2)) unique_list = np.unique(total_list) # ================================== # Get Model # ================================== # construct the data generator. params = { 'dim': (257, 100, 1), 'nfft': 512, 'spec_len': 100, 'win_length': 400, 'hop_length': 160, 'n_classes': 5994, 'sampling_rate': 16000, 'normalize': True, } network_eval = model.vggvox_resnet2d_icassp(input_dim=params['dim'], num_class=params['n_classes'], mode='eval', args=args) # ==> load pre-trained model ??? if args.resume: # ==> get real_model from arguments input, # load the model if the imag_model == real_model. if os.path.isfile(args.resume): network_eval.load_weights(os.path.join(args.resume), by_name=True) result_path = set_result_path(args) print('==> successfully loading model {}.'.format(args.resume)) else: raise IOError("==> no checkpoint found at '{}'".format( args.resume)) else: raise IOError('==> please type in the model to load') print('==> start testing.') # The feature extraction process has to be done sample-by-sample, # because each sample is of different lengths. total_length = len(unique_list) feats, scores, labels = [], [], [] for c, ID in enumerate(unique_list): if c % 50 == 0: print('Finish extracting features for {}/{}th wav.'.format( c, total_length)) specs = ut.load_data(ID, win_length=params['win_length'], sr=params['sampling_rate'], hop_length=params['hop_length'], n_fft=params['nfft'], spec_len=params['spec_len'], mode='eval') specs = np.expand_dims(np.expand_dims(specs, 0), -1) v = network_eval.predict(specs) feats += [v] feats = np.array(feats) # ==> compute the pair-wise similarity. for c, (p1, p2) in enumerate(zip(list1, list2)): ind1 = np.where(unique_list == p1)[0][0] ind2 = np.where(unique_list == p2)[0][0] v1 = feats[ind1, 0] v2 = feats[ind2, 0] scores += [np.sum(v1 * v2)] labels += [verify_lb[c]] print('scores : {}, gt : {}'.format(scores[-1], verify_lb[c])) scores = np.array(scores) labels = np.array(labels) np.save(os.path.join(result_path, 'prediction_scores.npy'), scores) np.save(os.path.join(result_path, 'groundtruth_labels.npy'), labels) eer, thresh = toolkits.calculate_eer(labels, scores) print('==> model : {}, EER: {}'.format(args.resume, eer))