Example #1
0
def compute_mma_results(jam_file, annotators, trim, bins=250, gt=False):
    """Compute the Mean Measure Agreement for all the algorithms of the given
    file jam_file.

    Parameters
    ----------
    jam_file: str
        Jam file containing all the annotations for a given track.
    annotators: dict
        Dictionary containing the names and e-mail addresses of the 5
        different annotators.
    trim: boolean
        Whether to trim the first and last boundary or not.
    bins: int
        The number of bins to compute for the Information Gain.

    Returns
    -------
    results_mma: np.array
        All the results for all the different comparisons between algorithms.
        In order to obtain the average, simply take the mean across axis=0.
    """
    context = "large_scale"
    results_mma = []
    if gt:
        keys = annotators.keys()
    else:
        keys = annotators.keys()[1:]
    for names in itertools.combinations(keys, 2):
        # Read estimated times from both algorithms
        if names[0] == "GT":
            ds_name = os.path.basename(jam_file).split("_")[0]
            ann_context = MSAF.prefix_dict[ds_name]
            est_inters1, est_labels1 = jams2.converters.load_jams_range(
                jam_file, "sections", annotator=0, context=ann_context)
        else:
            est_inters1, est_labels1 = jams2.converters.load_jams_range(
                jam_file, "sections", annotator_name=names[0], context=context)
        est_inters2, est_labels2 = jams2.converters.load_jams_range(jam_file,
                            "sections", annotator_name=names[1],
                            context=context)
        if est_inters1 == [] or est_inters2 == []:
            continue

        # Compute results
        #print est_inters1, est_inters2, jam_file, names
        #print names
        results = EV.compute_results(est_inters1, est_inters2, trim, bins,
                                     jam_file)
        results_mma.append(results)

    return np.asarray(results_mma)
def compute_mma_results(jam_file, annotators, trim, bins=250, gt=False):
    """Compute the Mean Measure Agreement for all the algorithms of the given
    file jam_file.

    Parameters
    ----------
    jam_file: str
        Jam file containing all the annotations for a given track.
    annotators: dict
        Dictionary containing the names and e-mail addresses of the 5
        different annotators.
    trim: boolean
        Whether to trim the first and last boundary or not.
    bins: int
        The number of bins to compute for the Information Gain.

    Returns
    -------
    results_mma: np.array
        All the results for all the different comparisons between algorithms.
        In order to obtain the average, simply take the mean across axis=0.
    """
    context = "large_scale"
    results_mma = []
    if gt:
        keys = annotators.keys()
    else:
        keys = annotators.keys()[1:]
    for names in itertools.combinations(keys, 2):
        # Read estimated times from both algorithms
        if names[0] == "GT":
            ds_name = os.path.basename(jam_file).split("_")[0]
            ann_context = MSAF.prefix_dict[ds_name]
            est_inters1, est_labels1 = jams2.converters.load_jams_range(
                jam_file, "sections", annotator=0, context=ann_context)
        else:
            est_inters1, est_labels1 = jams2.converters.load_jams_range(
                jam_file, "sections", annotator_name=names[0], context=context)
        est_inters2, est_labels2 = jams2.converters.load_jams_range(
            jam_file, "sections", annotator_name=names[1], context=context)
        if est_inters1 == [] or est_inters2 == []:
            continue

        # Compute results
        #print est_inters1, est_inters2, jam_file, names
        #print names
        results = EV.compute_results(est_inters1, est_inters2, trim, bins,
                                     jam_file)
        results_mma.append(results)

    return np.asarray(results_mma)
Example #3
0
def compute_mgp(jams_files, annotators, trim):
    """Computes the Mean Ground-truth Performance of the experiment
        results.

    For all the jams files of the experiment, it compares the results with
    the ground truth (also contained in the jam file), and computes the
    Mean Ground-truth Performance across them.

    It also reads the MGP of the machines, and plots them along with the
    humans results.

    Parameters
    ----------
    jams_files: list
        List containing all the file paths to the experiment files.
    annotators: dict
        Dictionary containing the names and e-mail addresses of the 5
        different annotators.
    trim: boolean
        Whether to trim the first and last boundaries.

    Returns
    -------
    mgp_results: np.array
        Array containing the results as in the eval module of MSAF for the
        humans performance.
    """
    mgp_results = np.empty((0, 9))
    est_context = "large_scale"
    bins = 250
    for i in xrange(1, len(annotators.keys())):
        FPR = np.empty((0, 9))
        for jam_file in jams_files:
            ds_name = os.path.basename(jam_file).split("_")[0]
            ann_context = MSAF.prefix_dict[ds_name]
            ann_times, ann_labels = jams2.converters.load_jams_range(jam_file,
                "sections", annotator_name=annotators.keys()[0],
                context=ann_context)
            try:
                est_times, est_labels = jams2.converters.load_jams_range(
                    jam_file, "sections", annotator_name=annotators.keys()[i],
                    context=est_context)
            except:
                logging.warning("Couldn't read annotator %d in JAMS %s" %
                                (i, jam_file))
                continue
            if len(ann_times) == 0:
                logging.warning("No GT annotations in file %s" % jam_file)
                continue
            if len(est_times) == 0:
                logging.warning("No annotation in file %s for annotator %s." %
                                (jam_file, annotators.keys()[i]))
                continue

            ann_times = np.asarray(ann_times)
            est_times = np.asarray(est_times)
            #print jam_file, ann_times, est_times, annotators.keys()[i]
            results = EV.compute_results(ann_times, est_times, trim, bins,
                                         jam_file)
            FPR = np.vstack((FPR, tuple(results)))

        if i == 1:
            mgp_results = np.vstack((mgp_results, FPR))
        else:
            if np.asarray([mgp_results, FPR]).ndim != 3:
                logging.warning("Ndim is not valid %d" %
                                np.asarray([mgp_results, FPR]).ndim)
                print len(mgp_results)
                print len(FPR)
                continue
            mgp_results = np.mean([mgp_results, FPR], axis=0)

        FPR = np.mean(FPR, axis=0)
        logging.info("Results for %s:\n\tF3: %.4f, P3: %.4f, R3: %.4f\n"
                     "\tF05: %.4f, P05: %.4f, R05: %.4f" % (
                         annotators.keys()[i], FPR[2], FPR[0], FPR[1], FPR[5],
                         FPR[3], FPR[4]))
    return mgp_results
def compute_mgp(jams_files, annotators, trim):
    """Computes the Mean Ground-truth Performance of the experiment
        results.

    For all the jams files of the experiment, it compares the results with
    the ground truth (also contained in the jam file), and computes the
    Mean Ground-truth Performance across them.

    It also reads the MGP of the machines, and plots them along with the
    humans results.

    Parameters
    ----------
    jams_files: list
        List containing all the file paths to the experiment files.
    annotators: dict
        Dictionary containing the names and e-mail addresses of the 5
        different annotators.
    trim: boolean
        Whether to trim the first and last boundaries.

    Returns
    -------
    mgp_results: np.array
        Array containing the results as in the eval module of MSAF for the
        humans performance.
    """
    mgp_results = np.empty((0, 9))
    est_context = "large_scale"
    bins = 250
    for i in xrange(1, len(annotators.keys())):
        FPR = np.empty((0, 9))
        for jam_file in jams_files:
            ds_name = os.path.basename(jam_file).split("_")[0]
            ann_context = MSAF.prefix_dict[ds_name]
            ann_times, ann_labels = jams2.converters.load_jams_range(
                jam_file,
                "sections",
                annotator_name=annotators.keys()[0],
                context=ann_context)
            try:
                est_times, est_labels = jams2.converters.load_jams_range(
                    jam_file,
                    "sections",
                    annotator_name=annotators.keys()[i],
                    context=est_context)
            except:
                logging.warning("Couldn't read annotator %d in JAMS %s" %
                                (i, jam_file))
                continue
            if len(ann_times) == 0:
                logging.warning("No GT annotations in file %s" % jam_file)
                continue
            if len(est_times) == 0:
                logging.warning("No annotation in file %s for annotator %s." %
                                (jam_file, annotators.keys()[i]))
                continue

            ann_times = np.asarray(ann_times)
            est_times = np.asarray(est_times)
            #print jam_file, ann_times, est_times, annotators.keys()[i]
            results = EV.compute_results(ann_times, est_times, trim, bins,
                                         jam_file)
            FPR = np.vstack((FPR, tuple(results)))

        if i == 1:
            mgp_results = np.vstack((mgp_results, FPR))
        else:
            if np.asarray([mgp_results, FPR]).ndim != 3:
                logging.warning("Ndim is not valid %d" %
                                np.asarray([mgp_results, FPR]).ndim)
                print len(mgp_results)
                print len(FPR)
                continue
            mgp_results = np.mean([mgp_results, FPR], axis=0)

        FPR = np.mean(FPR, axis=0)
        logging.info("Results for %s:\n\tF3: %.4f, P3: %.4f, R3: %.4f\n"
                     "\tF05: %.4f, P05: %.4f, R05: %.4f" %
                     (annotators.keys()[i], FPR[2], FPR[0], FPR[1], FPR[5],
                      FPR[3], FPR[4]))
    return mgp_results