Esempio n. 1
0
def run_metrics(references_f,
                hypothesis_f,
                metrics,
                visualization=False,
                class_to_keep=None):
    if len(references_f) != len(hypothesis_f):
        raise ValueError(
            "The number of reference files and hypothesis files must match ! (%d != %d)"
            % (len(references_f), len(hypothesis_f)))
    if visualization:
        visualization_dir = os.path.join(os.path.dirname(hypothesis_f[0]),
                                         "visualization")
        if not os.path.exists(visualization_dir):
            os.makedirs(visualization_dir)
    for ref_f, hyp_f in zip(references_f, hypothesis_f):
        ref, hyp = rttm_to_annotation(
            ref_f, class_to_keep=class_to_keep), rttm_to_annotation(
                hyp_f, class_to_keep=class_to_keep)
        basename = os.path.basename(ref_f)
        # Set the uri as the basename for both reference and hypothesis
        ref.uri, hyp.uri = basename, basename
        # Let's accumulate the score for each metrics
        # Let's accumulate the score for each metrics

        for m in metrics.values():
            res = m(ref, hyp)

        # Let's generate a visualization of the results
        if visualization:
            moment = find_1mn_highest_volubility(ref)
            if moment is not None:
                # Set figure size, and crop the annotation
                # for the highest volubile moment
                start, end = moment[0], moment[1]
                notebook.width = end / 4
                plt.rcParams['figure.figsize'] = (notebook.width, 10)
                notebook.crop = Segment(start, end)

                # Plot reference
                plt.subplot(211)
                notebook.plot_annotation(ref, legend=True, time=False)
                plt.gca().set_title(
                    'reference ' +
                    os.path.basename(ref_f).replace('.rttm', ''),
                    fontdict={'fontsize': 18})

                # Plot hypothesis
                plt.subplot(212)
                notebook.plot_annotation(hyp, legend=True, time=True)
                plt.gca().set_title(
                    'hypothesis ' +
                    os.path.basename(hyp_f).replace('.rttm', ''),
                    fontdict={'fontsize': 18})

                plt.savefig(
                    os.path.join(
                        visualization_dir,
                        os.path.basename(hyp_f).replace('.rttm', '.png')))
                plt.close()
    return metrics
def main(args):

    emb_dir = hp.callhome_emb_dir
    label_dir = hp.callhome_label_dir
    pred_dir = hp.callhome_pred_dir

    # Clustering per conversation.
    cluster_method = 'kmeans'
    save_dir = hp.logdir
    save_file = 'der_rates_v1.csv'

    der_rates = []
    aver_der_rate = 0

    do_plot = False  # Whether to check the annotation plot for target.
    target = '4822'  #Visualize the plots for this target

    # Perform diarization for each conversation.
    for conv in glob(path.join(emb_dir, "*.hdf5")):

        rec_name = path.splitext(path.basename(conv))[0]

        with h5py.File(conv, 'r') as f:
            embs = f['embs'][:]  # Gives a numpy array
        # Perform PCA.
        # embs = pca.fit_transform(embs)
        if np.isnan(embs).any() or np.isinf(embs).any():
            raise ValueError('Embeddings contain invalid value.')

        pred_labels = perform_clustering(embs, method=cluster_method)

        true_annotation, pred_annotation = get_annotations(rec_name, pred_labels, label_dir, pred_dir)

        if do_plot:

            if rec_name != target:  # Loop to find the target without calculating DER.
                continue
            notebook.width = 40
            plt.rcParams['figure.figsize'] = (notebook.width, 5)
            # plot reference
            plt.subplot(211)
            notebook.plot_annotation(true_annotation, legend=True, time=False)
            plt.ylabel('Reference', fontsize=16)
            # plot hypothesis
            plt.subplot(212)
            notebook.plot_annotation(pred_annotation, legend=True, time=True)
            plt.ylabel('Hypothesis', fontsize=16)
            plt.show()

            return

        der_rate = get_der(true_annotation, pred_annotation)
        der_rates.append([rec_name, der_rate])
        aver_der_rate += der_rate

    print(aver_der_rate / float(len(der_rates)))
    with open(path.join(save_dir, save_file), 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(der_rates)
Esempio n. 3
0
    def get_Plot(self, title=None, text=None, width=10, get_plt=False):
        """Plot the reference and hypothesis

        """
        notebook.width = width
        plt.figure(figsize=(notebook.width, 10))

        pTitle = ('Diarization Performance on %s' %
                  title) if title is not None else 'Diarization Performance'
        plt.suptitle(pTitle)

        # plot reference
        plt.subplot(211)
        notebook.plot_annotation(self.reference, legend=True, time=True)
        plt.gca().text(4,
                       0.15,
                       'Reference',
                       fontsize=10,
                       bbox=dict(edgecolor='None', facecolor='white', alpha=1))

        # plot hypothesis
        ptext = ''
        if text is not None:
            ptext = '\n' + text
        plt.subplot(212)
        notebook.plot_annotation(self.hypothesis, legend=True, time=True)
        plt.gca().text(4,
                       0.1,
                       'Hypothesis' + ptext,
                       fontsize=10,
                       bbox=dict(edgecolor='None', facecolor='white', alpha=1))
        plt.subplots_adjust(hspace=0.4)
        if get_plt:
            return plt
        else:
            plt.show()
Esempio n. 4
0
    spdr_metrics1.get_DiarizationCoveragePurityFScore(detailed=False))
text2 = "\nDER: %.4f | Purity: %.4f | Coverage: %.4f | PC-F-Score: %.4f" % (
    spdr_metrics2.get_DiarizationErrorRate(detailed=False),
    spdr_metrics2.get_DiarizationPurity(detailed=False),
    spdr_metrics2.get_DiarizationCoverage(detailed=False),
    spdr_metrics2.get_DiarizationCoveragePurityFScore(detailed=False))
text3 = "\nDER: %.4f | Purity: %.4f | Coverage: %.4f | PC-F-Score: %.4f" % (
    spdr_metrics3.get_DiarizationErrorRate(detailed=False),
    spdr_metrics3.get_DiarizationPurity(detailed=False),
    spdr_metrics3.get_DiarizationCoverage(detailed=False),
    spdr_metrics3.get_DiarizationCoveragePurityFScore(detailed=False))

# plot reference
plt.rcParams['figure.figsize'] = (10, 3)
plt.subplot(111)
notebook.plot_annotation(reference, legend=True, time=True)
plt.gca().text(0.6, 0.15, 'Reference', fontsize=12)
plt.xlabel('')
plt.savefig("./data/plots/thesis_metric/reference.png")
plt.close()

# plot hypothesis 1
plt.rcParams['figure.figsize'] = (10, 7)
plt.subplot(211)
notebook.plot_annotation(reference, legend=True, time=True)
plt.gca().text(0.6, 0.15, 'Reference', fontsize=12)
plt.xlabel('')
plt.subplot(212)
notebook.plot_annotation(hypothesis1, legend=True, time=True)
plt.gca().text(0.6, 0.15, 'Hypothesis perfect' + text1, fontsize=12)
plt.xlabel('')
Esempio n. 5
0
precisionrecall = Annotation()

uem = Timeline([Segment(float(0), float(20))])
spdr_metrics = SPDR_Metrics(reference, hypothesis1, uem)

text1 = "\nDER: %.4f | Purity: %.4f | Coverage: %.4f | PC-F-Score: %.4f" % (\
    spdr_metrics.get_DiarizationErrorRate(detailed=False), \
    spdr_metrics.get_DiarizationPurity(detailed=False), \
    spdr_metrics.get_DiarizationCoverage(detailed=False), \
    spdr_metrics.get_DiarizationCoveragePurityFScore(detailed=False), \
     )

# plot reference
plt.rcParams['figure.figsize'] = (10, 9)
plt.subplot(311)
notebook.plot_annotation(reference, legend=True, time=True)
plt.gca().text(0.6,
               0.15,
               'Reference',
               fontsize=12,
               bbox=dict(edgecolor='None', facecolor='white', alpha=1))
plt.xticks(np.linspace(0, 20, 21))
plt.xlabel('')
plt.grid()

# plot hypothesis
plt.subplot(312)
notebook.plot_annotation(hypothesis1, legend=True, time=True)
plt.gca().text(0.6,
               0.15,
               'Hypothesis' + text1,
            ax1.plot(
                [0, duration - 1.4],
                [hyper_parameters["onset"], hyper_parameters["onset"]],
                "k--",
            )
            ax1.text(0.1, hyper_parameters["onset"] + 0.04, "onset")
            ax1.plot(
                [1.4, len(scores)],
                [hyper_parameters["offset"], hyper_parameters["offset"]],
                "k--",
            )
            ax1.text(
                min(duration, 60) - 1.1, hyper_parameters["offset"] + 0.04,
                "offset")
            ax1.set_ylim(-0.1, 1.1)
            notebook.plot_annotation(output, ax=ax2, time=True, legend=True)

        else:
            fig, ax = plt.subplots(nrows=1, ncols=1)
            fig.set_figwidth(12)
            fig.set_figheight(2.0)
            notebook.plot_annotation(output, ax=ax, time=True, legend=True)

        plt.tight_layout()
        st.pyplot(fig=fig, clear_figure=True)
        plt.close(fig)

    with io.StringIO() as fp:
        output.write_rttm(fp)
        content = fp.getvalue()