Ejemplo n.º 1
0
def tune(dataset, medium_template, config_yml, weights_dir, output_dir):

    # load configuration file
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    X, y_true = generate_test(dataset, medium_template, config)

    # this is where model architecture was saved
    architecture_yml = os.path.dirname(weights_dir) + '/architecture.yml'

    output_dir = output_dir + '/' + dataset

    try:
        os.makedirs(output_dir)
    except Exception as e:
        pass

    nb_epoch = config['training']['nb_epoch']
    WEIGHTS_H5 = weights_dir + '/{epoch:04d}.h5'

    LINE = '{epoch:04d} {eer:.6f}\n'
    PATH = output_dir + '/eer.txt'
    with open(PATH.format(dataset=dataset), 'w') as fp:

        for epoch in range(nb_epoch):

            # load model for this epoch
            weights_h5 = WEIGHTS_H5.format(epoch=epoch)
            if not os.path.isfile(weights_h5):
                continue

            sequence_embedding = SequenceEmbedding.from_disk(
                architecture_yml, weights_h5)

            # pairwise euclidean distances between embeddings
            batch_size = config['testing']['batch_size']
            x = sequence_embedding.transform(X,
                                             batch_size=batch_size,
                                             verbose=0)
            distances = pdist(x, metric='euclidean')
            PATH = output_dir + '/plot.{epoch:04d}'
            eer = plot_det_curve(y_true, -distances, PATH.format(epoch=epoch))

            msg = 'Epoch #{epoch:04d} | EER = {eer:.2f}%'
            print(msg.format(epoch=epoch, eer=100 * eer))

            fp.write(LINE.format(epoch=epoch, eer=eer))
            fp.flush()

            # save distribution plots after each epoch
            space = config['network']['space']
            xlim = (0, 2 if space == 'sphere' else np.sqrt(2.))
            plot_distributions(y_true,
                               distances,
                               PATH.format(epoch=epoch),
                               xlim=xlim,
                               ymax=3,
                               nbins=100)
Ejemplo n.º 2
0
    def on_epoch_end(self, epoch, logs={}):

        # keep track of current time
        now = datetime.datetime.now().isoformat()

        embedding = self.extract_embedding(self.model)
        fX = embedding.predict(self.X_)
        distance = pdist(fX, metric=self.distance)
        prefix = self.log_dir + '/{subset}.plot.{epoch:04d}'.format(
            subset=self.subset, epoch=epoch)

        # plot distance distribution every 20 epochs (and 10 first epochs)
        xlim = get_range(metric=self.distance)
        if (epoch < 10) or (epoch % 20 == 0):
            plot_distributions(self.y_,
                               distance,
                               prefix,
                               xlim=xlim,
                               ymax=3,
                               nbins=100,
                               dpi=75)

        # plot DET curve once every 20 epochs (and 10 first epochs)
        if (epoch < 10) or (epoch % 20 == 0):
            eer = plot_det_curve(self.y_,
                                 distance,
                                 prefix,
                                 distances=True,
                                 dpi=75)
        else:
            _, _, _, eer = det_curve(self.y_, distance, distances=True)

        # store equal error rate in file
        mode = 'a' if epoch else 'w'
        path = self.log_dir + '/{subset}.eer.txt'.format(subset=self.subset)
        with open(path, mode=mode) as fp:
            fp.write(self.EER_TEMPLATE_.format(epoch=epoch, eer=eer, now=now))
            fp.flush()

        # plot eer = f(epoch)
        self.eer_.append(eer)
        best_epoch = np.argmin(self.eer_)
        best_value = np.min(self.eer_)
        fig = plt.figure()
        plt.plot(self.eer_, 'b')
        plt.plot([best_epoch], [best_value], 'bo')
        plt.plot([0, epoch], [best_value, best_value], 'k--')
        plt.grid(True)
        plt.xlabel('epoch')
        plt.ylabel('EER on {subset}'.format(subset=self.subset))
        TITLE = 'EER = {best_value:.5g} on {subset} @ epoch #{best_epoch:d}'
        title = TITLE.format(best_value=best_value,
                             best_epoch=best_epoch,
                             subset=self.subset)
        plt.title(title)
        plt.tight_layout()
        path = self.log_dir + '/{subset}.eer.png'.format(subset=self.subset)
        plt.savefig(path, dpi=75)
        plt.close(fig)
Ejemplo n.º 3
0
def test(dataset, medium_template, config_yml, weights_h5, output_dir):

    # load configuration file
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    X, y_true = generate_test(dataset, medium_template, config)

    # this is where model architecture was saved
    architecture_yml = os.path.dirname(
        os.path.dirname(weights_h5)) + '/architecture.yml'

    sequence_embedding = SequenceEmbedding.from_disk(architecture_yml,
                                                     weights_h5)

    # pairwise euclidean distances between embeddings
    batch_size = config['testing']['batch_size']
    x = sequence_embedding.transform(X, batch_size=batch_size, verbose=0)
    distances = pdist(x, metric='euclidean')

    # -- distances distributions
    space = config['network']['space']
    xlim = (0, 2 if space == 'sphere' else np.sqrt(2.))
    plot_distributions(y_true,
                       distances,
                       output_dir + '/plot',
                       xlim=xlim,
                       ymax=3,
                       nbins=100)

    # -- precision / recall curve
    auc = plot_precision_recall_curve(y_true, -distances, output_dir + '/plot')
    msg = 'AUC = {auc:.2f}%'
    print(msg.format(auc=100 * auc))

    # -- det curve
    eer = plot_det_curve(y_true, -distances, output_dir + '/plot')
    msg = 'EER = {eer:.2f}%'
    print(msg.format(eer=100 * eer))
Ejemplo n.º 4
0
def test(protocol, tune_dir, test_dir, subset, beta=1.0):

    batch_size = 32

    try:
        os.makedirs(test_dir)
    except Exception as e:
        pass

    train_dir = os.path.dirname(os.path.dirname(tune_dir))

    # -- DURATIONS --
    duration, min_duration, step, heterogeneous = \
        path_to_duration(os.path.basename(train_dir))

    config_dir = os.path.dirname(os.path.dirname(os.path.dirname(train_dir)))
    config_yml = config_dir + '/config.yml'
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # -- PREPROCESSORS --
    for key, preprocessor in config.get('preprocessors', {}).items():
        preprocessor_name = preprocessor['name']
        preprocessor_params = preprocessor.get('params', {})
        preprocessors = __import__('pyannote.audio.preprocessors',
                                   fromlist=[preprocessor_name])
        Preprocessor = getattr(preprocessors, preprocessor_name)
        protocol.preprocessors[key] = Preprocessor(**preprocessor_params)

    # -- FEATURE EXTRACTION --
    feature_extraction_name = config['feature_extraction']['name']
    features = __import__('pyannote.audio.features',
                          fromlist=[feature_extraction_name])
    FeatureExtraction = getattr(features, feature_extraction_name)
    feature_extraction = FeatureExtraction(
        **config['feature_extraction'].get('params', {}))

    distance = config['glue'].get('params', {}).get('distance', 'sqeuclidean')

    # -- HYPER-PARAMETERS --
    tune_yml = tune_dir + '/tune.yml'
    with open(tune_yml, 'r') as fp:
        tune = yaml.load(fp)

    architecture_yml = train_dir + '/architecture.yml'
    WEIGHTS_H5 = train_dir + '/weights/{epoch:04d}.h5'
    weights_h5 = WEIGHTS_H5.format(epoch=tune['epoch'])

    sequence_embedding = SequenceEmbedding.from_disk(
        architecture_yml, weights_h5)

    X, y = generate_test(protocol, subset, feature_extraction,
                         duration, min_duration=min_duration, step=step)
    fX = sequence_embedding.transform(X, batch_size=batch_size)
    y_distance = pdist(fX, metric=distance)
    y_true = pdist(y, metric='chebyshev') < 1

    fpr, tpr, thresholds = sklearn.metrics.roc_curve(
        y_true, -y_distance, pos_label=True, drop_intermediate=True)

    frr = 1. - tpr
    far = fpr
    thresholds = -thresholds

    eer_index = np.where(far > frr)[0][0]
    eer = .25 * (far[eer_index-1] + far[eer_index] +
                 frr[eer_index-1] + frr[eer_index])

    fscore = 1. - f_measure(1. - frr, 1. - far, beta=beta)

    opt_i = np.nanargmin(fscore)
    opt_alpha = float(thresholds[opt_i])
    opt_far = far[opt_i]
    opt_frr = frr[opt_i]
    opt_fscore = fscore[opt_i]

    alpha = tune['alpha']
    actual_i = np.searchsorted(thresholds, alpha)
    actual_far = far[actual_i]
    actual_frr = frr[actual_i]
    actual_fscore = fscore[actual_i]

    save_to = test_dir + '/' + subset
    plot_distributions(y_true, y_distance, save_to)
    eer = plot_det_curve(y_true, -y_distance, save_to)
    plot_precision_recall_curve(y_true, -y_distance, save_to)

    with open(save_to + '.txt', 'w') as fp:
        fp.write('# cond. thresh  far     frr     fscore  eer\n')
        TEMPLATE = '{condition} {alpha:.5f} {far:.5f} {frr:.5f} {fscore:.5f} {eer:.5f}\n'
        fp.write(TEMPLATE.format(condition='optimal',
                                 alpha=opt_alpha,
                                 far=opt_far,
                                 frr=opt_frr,
                                 fscore=opt_fscore,
                                 eer=eer))
        fp.write(TEMPLATE.format(condition='actual ',
                                 alpha=alpha,
                                 far=actual_far,
                                 frr=actual_frr,
                                 fscore=actual_fscore,
                                 eer=eer))
Ejemplo n.º 5
0
def compare(dataset, medium_template, config_yml, output_dir):

    import itertools
    from pyannote.algorithms.stats.gaussian import Gaussian

    # load configuration file
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    X, y_true = generate_test(dataset, medium_template, config)

    n_sequences = X.shape[0]

    gaussians = []
    for x in X:
        g = Gaussian(covariance_type='diag').fit(x)
        gaussians.append(g)

    bic = np.zeros((n_sequences, n_sequences), dtype=np.float)
    for i, j in itertools.combinations(range(n_sequences), 2):
        bic[i, j], _ = gaussians[i].bic(gaussians[j], penalty_coef=0.)

    distances = squareform(bic, checks=False)

    # -- distances distributions
    plot_distributions(y_true,
                       distances,
                       output_dir + '/plot.bic',
                       xlim=(0, 20),
                       ymax=0.5,
                       nbins=100)

    # -- precision / recall curve
    auc = plot_precision_recall_curve(y_true, -distances,
                                      output_dir + '/plot.bic')
    msg = 'BIC | AUC = {auc:.2f}%'
    print(msg.format(auc=100 * auc))

    # -- det curve
    eer = plot_det_curve(y_true, -distances, output_dir + '/plot.bic')
    msg = 'BIC | EER = {eer:.2f}%'
    print(msg.format(eer=100 * eer))

    divergence = np.zeros((n_sequences, n_sequences), dtype=np.float)
    for i, j in itertools.combinations(range(n_sequences), 2):
        divergence[i, j] = gaussians[i].divergence(gaussians[j])

    distances = squareform(divergence, checks=False)

    # -- distances distributions
    plot_distributions(y_true,
                       distances,
                       output_dir + '/plot.divergence',
                       xlim=(0, 20),
                       ymax=0.5,
                       nbins=100)

    # -- precision / recall curve
    auc = plot_precision_recall_curve(y_true, -distances,
                                      output_dir + '/plot.divergence')
    msg = 'Divergence | AUC = {auc:.2f}%'
    print(msg.format(auc=100 * auc))

    # -- det curve
    eer = plot_det_curve(y_true, -distances, output_dir + '/plot.divergence')
    msg = 'Divergence | EER = {eer:.2f}%'
    print(msg.format(eer=100 * eer))
    gi, gj = full[i], full[j]
    bic[i, j] = gi.bic(gj, penalty_coef=0)[0]
    bic[j, i] = bic[i, j]

    gi, gj = diag[i], diag[j]
    div[i, j] = gi.divergence(gj)
    div[j, i] = div[i, j]

from scipy.spatial.distance import squareform
bic = squareform(bic, checks=False)
div = squareform(div, checks=False)

# compute same/different groundtruth
from scipy.spatial.distance import pdist
y_true = pdist(y, metric='chebyshev') < 1

# plot positive/negative scores distribution
# plot DET curve and return equal error rate
from pyannote.metrics.plot.binary_classification import \
    plot_det_curve, plot_distributions

bic_prefix = LOG_DIR + '/plot.bic'
plot_distributions(y_true, bic, bic_prefix, xlim=(0, 2), ymax=3, nbins=100)
eer = plot_det_curve(y_true, -bic, bic_prefix)
print('BIC EER = {eer:.2f}%'.format(eer=100 * eer))

div_prefix = LOG_DIR + '/plot.div'
plot_distributions(y_true, div, div_prefix, xlim=(0, 2), ymax=3, nbins=100)
eer = plot_det_curve(y_true, -div, div_prefix)
print('DIV EER = {eer:.2f}%'.format(eer=100 * eer))
Ejemplo n.º 7
0
def same_different_experiment(ark_file,
                              utt_2_spk,
                              half_index=-1,
                              normalize=False,
                              fileset='',
                              use_metric='cosine',
                              max_spks=-1,
                              random_seed=42):

    results_file = 'samedifferent_results.csv'

    from pyannote.metrics.plot.binary_classification import plot_det_curve, plot_distributions

    print('Loading feats now:')

    feats, uttids = kaldi_io.readArk(ark_file.replace('%set', fileset))

    print('loaded: ' + str(len(feats)) + ' feats')
    print('feat[0] shape: ', feats[0].shape)

    #feats = np.vstack([pairwise_normalize(feat[0]) for feat in feats])

    print('Generating mean vector.')

    feats = np.vstack([feat.mean(0) for utt, feat in zip(uttids, feats)])

    if half_index != -1:
        print('Cutting vectors at ', half_index,
              'and normalize to unit length' if normalize else '')
        feats = np.vstack([
            feat[:half_index] /
            (np.linalg.norm(feat[:half_index]) if normalize else 1.0)
            for feat in feats
        ])
    else:
        if normalize:
            print('Normalize to unit length.')
            feats = np.vstack([feat / np.linalg.norm(feat) for feat in feats])

    #print(type(feats))
    #print(feats)

    if utt_2_spk is not None and utt_2_spk.lower(
    ) != 'none' and utt_2_spk.strip() != '':
        utt_2_spk = utils.loadUtt2Spk(utt_2_spk.replace('%set', fileset))

        if max_spks != -1:

            # sample subset of speakers
            spk_set = list(set(utt_2_spk.values()))

            # make speaker selection reproducable
            spk_set.sort()
            #print('Selecting from these speakers:', spk_set)
            random.seed(random_seed)
            #np.random.seed(42)

            selected_spks = random.sample(spk_set, min(len(spk_set), max_spks))

            print('Selecting random subset of speakers with random seed',
                  random_seed, ':', len(selected_spks), 'speakers')
            print(selected_spks)

            # make new utt2spk dictionary on subset
            utt_2_spk_new = dict([(key, utt_2_spk[key]) for key in utt_2_spk
                                  if utt_2_spk[key] in selected_spks])

            #filter feats and uttids
            feats = [
                feat for feat, uttid in zip(feats, uttids)
                if uttid in utt_2_spk_new
            ]
            uttids = [uttid for uttid in uttids if uttid in utt_2_spk_new]

            print('Reduced feats to: ' + str(len(feats)) + ' feats')
            print('Reduced uttids to: ' + str(len(feats)) + ' uttids')

            utt_2_spk = utt_2_spk_new
        else:
            print('Using all speakers:', len(set(utt_2_spk.values())))

        ground_truth_utt_2_spk = [utt_2_spk[utt_id] for utt_id in uttids]

        le = preprocessing.LabelEncoder()
        le.fit(ground_truth_utt_2_spk)

        ground_truth_utt_2_spk_int = le.transform(ground_truth_utt_2_spk)

        print("Ground truth speaker classes available:")

        print(ground_truth_utt_2_spk_int)

    print('Calculating', use_metric, 'distance matrix...')
    #print('feats shape:', feats.shape)
    distances = pdist(feats, metric=use_metric)

    print('Calculating ground thruth distance matrix...')
    y_true = pdist(np.asarray(ground_truth_utt_2_spk_int)[:, np.newaxis],
                   metric='chebyshev') < 1

    result_key = ark_file.split('/')[-3] + ('.' + fileset if fileset != '' else
                                            '') + '.' + use_metric
    prefix = 'plots/plot.' + ark_file.split('/')[-3] + '.' + use_metric + (
        '.' + fileset if fileset != '' else '') + '.seed_' + str(random_seed)

    plot_distributions(y_true,
                       distances,
                       prefix,
                       xlim=(0, 2),
                       ymax=3,
                       nbins=100)

    eer = plot_det_curve(y_true, -distances, prefix)

    print('EER = {eer:.2f}%'.format(eer=100 * eer))

    with open(results_file, 'a') as outfile:
        outfile.write(result_key + ' ' + '{eer:.2f}%'.format(eer=100 * eer) +
                      '\n')
    i = np.random.choice(np.where(y == speaker)[0],
                         size=min(100, counts[speaker]),
                         replace=False)
    indices.append(i)
indices = np.hstack(indices)
X, y = X[indices], y[indices, np.newaxis]

# load pre-trained embedding
architecture_yml = LOG_DIR + '/architecture.yml'
weights_h5 = LOG_DIR + '/weights/{epoch:04d}.h5'.format(epoch=nb_epoch - 1)
embedding = SequenceEmbedding.from_disk(architecture_yml, weights_h5)

# embed all sequences
fX = embedding.transform(X, batch_size=batch_size, verbose=0)

# compute euclidean distance between every pair of sequences
from scipy.spatial.distance import pdist
distances = pdist(fX, metric='euclidean')

# compute same/different groundtruth
y_true = pdist(y, metric='chebyshev') < 1

# plot positive/negative scores distribution
# plot DET curve and return equal error rate
from pyannote.metrics.plot.binary_classification import \
    plot_det_curve, plot_distributions
prefix = LOG_DIR + '/plot.{epoch:04d}'.format(epoch=nb_epoch - 1)
plot_distributions(y_true, distances, prefix, xlim=(0, 2), ymax=3, nbins=100)
eer = plot_det_curve(y_true, -distances, prefix)
print('EER = {eer:.2f}%'.format(eer=100 * eer))