def tune(dataset, medium_template, config_yml, weights_dir, output_dir): # load configuration file with open(config_yml, 'r') as fp: config = yaml.load(fp) X, y_true = generate_test(dataset, medium_template, config) # this is where model architecture was saved architecture_yml = os.path.dirname(weights_dir) + '/architecture.yml' output_dir = output_dir + '/' + dataset try: os.makedirs(output_dir) except Exception as e: pass nb_epoch = config['training']['nb_epoch'] WEIGHTS_H5 = weights_dir + '/{epoch:04d}.h5' LINE = '{epoch:04d} {eer:.6f}\n' PATH = output_dir + '/eer.txt' with open(PATH.format(dataset=dataset), 'w') as fp: for epoch in range(nb_epoch): # load model for this epoch weights_h5 = WEIGHTS_H5.format(epoch=epoch) if not os.path.isfile(weights_h5): continue sequence_embedding = SequenceEmbedding.from_disk( architecture_yml, weights_h5) # pairwise euclidean distances between embeddings batch_size = config['testing']['batch_size'] x = sequence_embedding.transform(X, batch_size=batch_size, verbose=0) distances = pdist(x, metric='euclidean') PATH = output_dir + '/plot.{epoch:04d}' eer = plot_det_curve(y_true, -distances, PATH.format(epoch=epoch)) msg = 'Epoch #{epoch:04d} | EER = {eer:.2f}%' print(msg.format(epoch=epoch, eer=100 * eer)) fp.write(LINE.format(epoch=epoch, eer=eer)) fp.flush() # save distribution plots after each epoch space = config['network']['space'] xlim = (0, 2 if space == 'sphere' else np.sqrt(2.)) plot_distributions(y_true, distances, PATH.format(epoch=epoch), xlim=xlim, ymax=3, nbins=100)
def on_epoch_end(self, epoch, logs={}): # keep track of current time now = datetime.datetime.now().isoformat() embedding = self.extract_embedding(self.model) fX = embedding.predict(self.X_) distance = pdist(fX, metric=self.distance) prefix = self.log_dir + '/{subset}.plot.{epoch:04d}'.format( subset=self.subset, epoch=epoch) # plot distance distribution every 20 epochs (and 10 first epochs) xlim = get_range(metric=self.distance) if (epoch < 10) or (epoch % 20 == 0): plot_distributions(self.y_, distance, prefix, xlim=xlim, ymax=3, nbins=100, dpi=75) # plot DET curve once every 20 epochs (and 10 first epochs) if (epoch < 10) or (epoch % 20 == 0): eer = plot_det_curve(self.y_, distance, prefix, distances=True, dpi=75) else: _, _, _, eer = det_curve(self.y_, distance, distances=True) # store equal error rate in file mode = 'a' if epoch else 'w' path = self.log_dir + '/{subset}.eer.txt'.format(subset=self.subset) with open(path, mode=mode) as fp: fp.write(self.EER_TEMPLATE_.format(epoch=epoch, eer=eer, now=now)) fp.flush() # plot eer = f(epoch) self.eer_.append(eer) best_epoch = np.argmin(self.eer_) best_value = np.min(self.eer_) fig = plt.figure() plt.plot(self.eer_, 'b') plt.plot([best_epoch], [best_value], 'bo') plt.plot([0, epoch], [best_value, best_value], 'k--') plt.grid(True) plt.xlabel('epoch') plt.ylabel('EER on {subset}'.format(subset=self.subset)) TITLE = 'EER = {best_value:.5g} on {subset} @ epoch #{best_epoch:d}' title = TITLE.format(best_value=best_value, best_epoch=best_epoch, subset=self.subset) plt.title(title) plt.tight_layout() path = self.log_dir + '/{subset}.eer.png'.format(subset=self.subset) plt.savefig(path, dpi=75) plt.close(fig)
def test(dataset, medium_template, config_yml, weights_h5, output_dir): # load configuration file with open(config_yml, 'r') as fp: config = yaml.load(fp) X, y_true = generate_test(dataset, medium_template, config) # this is where model architecture was saved architecture_yml = os.path.dirname( os.path.dirname(weights_h5)) + '/architecture.yml' sequence_embedding = SequenceEmbedding.from_disk(architecture_yml, weights_h5) # pairwise euclidean distances between embeddings batch_size = config['testing']['batch_size'] x = sequence_embedding.transform(X, batch_size=batch_size, verbose=0) distances = pdist(x, metric='euclidean') # -- distances distributions space = config['network']['space'] xlim = (0, 2 if space == 'sphere' else np.sqrt(2.)) plot_distributions(y_true, distances, output_dir + '/plot', xlim=xlim, ymax=3, nbins=100) # -- precision / recall curve auc = plot_precision_recall_curve(y_true, -distances, output_dir + '/plot') msg = 'AUC = {auc:.2f}%' print(msg.format(auc=100 * auc)) # -- det curve eer = plot_det_curve(y_true, -distances, output_dir + '/plot') msg = 'EER = {eer:.2f}%' print(msg.format(eer=100 * eer))
def test(protocol, tune_dir, test_dir, subset, beta=1.0): batch_size = 32 try: os.makedirs(test_dir) except Exception as e: pass train_dir = os.path.dirname(os.path.dirname(tune_dir)) # -- DURATIONS -- duration, min_duration, step, heterogeneous = \ path_to_duration(os.path.basename(train_dir)) config_dir = os.path.dirname(os.path.dirname(os.path.dirname(train_dir))) config_yml = config_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp) # -- PREPROCESSORS -- for key, preprocessor in config.get('preprocessors', {}).items(): preprocessor_name = preprocessor['name'] preprocessor_params = preprocessor.get('params', {}) preprocessors = __import__('pyannote.audio.preprocessors', fromlist=[preprocessor_name]) Preprocessor = getattr(preprocessors, preprocessor_name) protocol.preprocessors[key] = Preprocessor(**preprocessor_params) # -- FEATURE EXTRACTION -- feature_extraction_name = config['feature_extraction']['name'] features = __import__('pyannote.audio.features', fromlist=[feature_extraction_name]) FeatureExtraction = getattr(features, feature_extraction_name) feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) distance = config['glue'].get('params', {}).get('distance', 'sqeuclidean') # -- HYPER-PARAMETERS -- tune_yml = tune_dir + '/tune.yml' with open(tune_yml, 'r') as fp: tune = yaml.load(fp) architecture_yml = train_dir + '/architecture.yml' WEIGHTS_H5 = train_dir + '/weights/{epoch:04d}.h5' weights_h5 = WEIGHTS_H5.format(epoch=tune['epoch']) sequence_embedding = SequenceEmbedding.from_disk( architecture_yml, weights_h5) X, y = generate_test(protocol, subset, feature_extraction, duration, min_duration=min_duration, step=step) fX = sequence_embedding.transform(X, batch_size=batch_size) y_distance = pdist(fX, metric=distance) y_true = pdist(y, metric='chebyshev') < 1 fpr, tpr, thresholds = sklearn.metrics.roc_curve( y_true, -y_distance, pos_label=True, drop_intermediate=True) frr = 1. - tpr far = fpr thresholds = -thresholds eer_index = np.where(far > frr)[0][0] eer = .25 * (far[eer_index-1] + far[eer_index] + frr[eer_index-1] + frr[eer_index]) fscore = 1. - f_measure(1. - frr, 1. - far, beta=beta) opt_i = np.nanargmin(fscore) opt_alpha = float(thresholds[opt_i]) opt_far = far[opt_i] opt_frr = frr[opt_i] opt_fscore = fscore[opt_i] alpha = tune['alpha'] actual_i = np.searchsorted(thresholds, alpha) actual_far = far[actual_i] actual_frr = frr[actual_i] actual_fscore = fscore[actual_i] save_to = test_dir + '/' + subset plot_distributions(y_true, y_distance, save_to) eer = plot_det_curve(y_true, -y_distance, save_to) plot_precision_recall_curve(y_true, -y_distance, save_to) with open(save_to + '.txt', 'w') as fp: fp.write('# cond. thresh far frr fscore eer\n') TEMPLATE = '{condition} {alpha:.5f} {far:.5f} {frr:.5f} {fscore:.5f} {eer:.5f}\n' fp.write(TEMPLATE.format(condition='optimal', alpha=opt_alpha, far=opt_far, frr=opt_frr, fscore=opt_fscore, eer=eer)) fp.write(TEMPLATE.format(condition='actual ', alpha=alpha, far=actual_far, frr=actual_frr, fscore=actual_fscore, eer=eer))
def compare(dataset, medium_template, config_yml, output_dir): import itertools from pyannote.algorithms.stats.gaussian import Gaussian # load configuration file with open(config_yml, 'r') as fp: config = yaml.load(fp) X, y_true = generate_test(dataset, medium_template, config) n_sequences = X.shape[0] gaussians = [] for x in X: g = Gaussian(covariance_type='diag').fit(x) gaussians.append(g) bic = np.zeros((n_sequences, n_sequences), dtype=np.float) for i, j in itertools.combinations(range(n_sequences), 2): bic[i, j], _ = gaussians[i].bic(gaussians[j], penalty_coef=0.) distances = squareform(bic, checks=False) # -- distances distributions plot_distributions(y_true, distances, output_dir + '/plot.bic', xlim=(0, 20), ymax=0.5, nbins=100) # -- precision / recall curve auc = plot_precision_recall_curve(y_true, -distances, output_dir + '/plot.bic') msg = 'BIC | AUC = {auc:.2f}%' print(msg.format(auc=100 * auc)) # -- det curve eer = plot_det_curve(y_true, -distances, output_dir + '/plot.bic') msg = 'BIC | EER = {eer:.2f}%' print(msg.format(eer=100 * eer)) divergence = np.zeros((n_sequences, n_sequences), dtype=np.float) for i, j in itertools.combinations(range(n_sequences), 2): divergence[i, j] = gaussians[i].divergence(gaussians[j]) distances = squareform(divergence, checks=False) # -- distances distributions plot_distributions(y_true, distances, output_dir + '/plot.divergence', xlim=(0, 20), ymax=0.5, nbins=100) # -- precision / recall curve auc = plot_precision_recall_curve(y_true, -distances, output_dir + '/plot.divergence') msg = 'Divergence | AUC = {auc:.2f}%' print(msg.format(auc=100 * auc)) # -- det curve eer = plot_det_curve(y_true, -distances, output_dir + '/plot.divergence') msg = 'Divergence | EER = {eer:.2f}%' print(msg.format(eer=100 * eer))
gi, gj = full[i], full[j] bic[i, j] = gi.bic(gj, penalty_coef=0)[0] bic[j, i] = bic[i, j] gi, gj = diag[i], diag[j] div[i, j] = gi.divergence(gj) div[j, i] = div[i, j] from scipy.spatial.distance import squareform bic = squareform(bic, checks=False) div = squareform(div, checks=False) # compute same/different groundtruth from scipy.spatial.distance import pdist y_true = pdist(y, metric='chebyshev') < 1 # plot positive/negative scores distribution # plot DET curve and return equal error rate from pyannote.metrics.plot.binary_classification import \ plot_det_curve, plot_distributions bic_prefix = LOG_DIR + '/plot.bic' plot_distributions(y_true, bic, bic_prefix, xlim=(0, 2), ymax=3, nbins=100) eer = plot_det_curve(y_true, -bic, bic_prefix) print('BIC EER = {eer:.2f}%'.format(eer=100 * eer)) div_prefix = LOG_DIR + '/plot.div' plot_distributions(y_true, div, div_prefix, xlim=(0, 2), ymax=3, nbins=100) eer = plot_det_curve(y_true, -div, div_prefix) print('DIV EER = {eer:.2f}%'.format(eer=100 * eer))
def same_different_experiment(ark_file, utt_2_spk, half_index=-1, normalize=False, fileset='', use_metric='cosine', max_spks=-1, random_seed=42): results_file = 'samedifferent_results.csv' from pyannote.metrics.plot.binary_classification import plot_det_curve, plot_distributions print('Loading feats now:') feats, uttids = kaldi_io.readArk(ark_file.replace('%set', fileset)) print('loaded: ' + str(len(feats)) + ' feats') print('feat[0] shape: ', feats[0].shape) #feats = np.vstack([pairwise_normalize(feat[0]) for feat in feats]) print('Generating mean vector.') feats = np.vstack([feat.mean(0) for utt, feat in zip(uttids, feats)]) if half_index != -1: print('Cutting vectors at ', half_index, 'and normalize to unit length' if normalize else '') feats = np.vstack([ feat[:half_index] / (np.linalg.norm(feat[:half_index]) if normalize else 1.0) for feat in feats ]) else: if normalize: print('Normalize to unit length.') feats = np.vstack([feat / np.linalg.norm(feat) for feat in feats]) #print(type(feats)) #print(feats) if utt_2_spk is not None and utt_2_spk.lower( ) != 'none' and utt_2_spk.strip() != '': utt_2_spk = utils.loadUtt2Spk(utt_2_spk.replace('%set', fileset)) if max_spks != -1: # sample subset of speakers spk_set = list(set(utt_2_spk.values())) # make speaker selection reproducable spk_set.sort() #print('Selecting from these speakers:', spk_set) random.seed(random_seed) #np.random.seed(42) selected_spks = random.sample(spk_set, min(len(spk_set), max_spks)) print('Selecting random subset of speakers with random seed', random_seed, ':', len(selected_spks), 'speakers') print(selected_spks) # make new utt2spk dictionary on subset utt_2_spk_new = dict([(key, utt_2_spk[key]) for key in utt_2_spk if utt_2_spk[key] in selected_spks]) #filter feats and uttids feats = [ feat for feat, uttid in zip(feats, uttids) if uttid in utt_2_spk_new ] uttids = [uttid for uttid in uttids if uttid in utt_2_spk_new] print('Reduced feats to: ' + str(len(feats)) + ' feats') print('Reduced uttids to: ' + str(len(feats)) + ' uttids') utt_2_spk = utt_2_spk_new else: print('Using all speakers:', len(set(utt_2_spk.values()))) ground_truth_utt_2_spk = [utt_2_spk[utt_id] for utt_id in uttids] le = preprocessing.LabelEncoder() le.fit(ground_truth_utt_2_spk) ground_truth_utt_2_spk_int = le.transform(ground_truth_utt_2_spk) print("Ground truth speaker classes available:") print(ground_truth_utt_2_spk_int) print('Calculating', use_metric, 'distance matrix...') #print('feats shape:', feats.shape) distances = pdist(feats, metric=use_metric) print('Calculating ground thruth distance matrix...') y_true = pdist(np.asarray(ground_truth_utt_2_spk_int)[:, np.newaxis], metric='chebyshev') < 1 result_key = ark_file.split('/')[-3] + ('.' + fileset if fileset != '' else '') + '.' + use_metric prefix = 'plots/plot.' + ark_file.split('/')[-3] + '.' + use_metric + ( '.' + fileset if fileset != '' else '') + '.seed_' + str(random_seed) plot_distributions(y_true, distances, prefix, xlim=(0, 2), ymax=3, nbins=100) eer = plot_det_curve(y_true, -distances, prefix) print('EER = {eer:.2f}%'.format(eer=100 * eer)) with open(results_file, 'a') as outfile: outfile.write(result_key + ' ' + '{eer:.2f}%'.format(eer=100 * eer) + '\n')
i = np.random.choice(np.where(y == speaker)[0], size=min(100, counts[speaker]), replace=False) indices.append(i) indices = np.hstack(indices) X, y = X[indices], y[indices, np.newaxis] # load pre-trained embedding architecture_yml = LOG_DIR + '/architecture.yml' weights_h5 = LOG_DIR + '/weights/{epoch:04d}.h5'.format(epoch=nb_epoch - 1) embedding = SequenceEmbedding.from_disk(architecture_yml, weights_h5) # embed all sequences fX = embedding.transform(X, batch_size=batch_size, verbose=0) # compute euclidean distance between every pair of sequences from scipy.spatial.distance import pdist distances = pdist(fX, metric='euclidean') # compute same/different groundtruth y_true = pdist(y, metric='chebyshev') < 1 # plot positive/negative scores distribution # plot DET curve and return equal error rate from pyannote.metrics.plot.binary_classification import \ plot_det_curve, plot_distributions prefix = LOG_DIR + '/plot.{epoch:04d}'.format(epoch=nb_epoch - 1) plot_distributions(y_true, distances, prefix, xlim=(0, 2), ymax=3, nbins=100) eer = plot_det_curve(y_true, -distances, prefix) print('EER = {eer:.2f}%'.format(eer=100 * eer))