def compute_information_gain(ann_inter, est_inter, est_file, bins): """Computes the information gain of the est_file from the annotated intervals and the estimated intervals.""" ann_times = utils.intervals_to_times(ann_inter) est_times = utils.intervals_to_times(est_inter) try: D = mir_eval.beat.information_gain(ann_times, est_times, bins=bins) except: logging.warning("Couldn't compute the Information Gain for file " "%s" % est_file) D = 0 return D
def get_all_est_boundaries(est_file, annot_beats, algo_ids=None, annotator_id=0): """Gets all the estimated boundaries for all the algorithms. Parameters ---------- est_file: str Path to the estimated file (JSON file) annot_beats: bool Whether to use the annotated beats or not. algo_ids : list List of algorithm ids to to read boundaries from. If None, all algorithm ids are read. Returns ------- all_boundaries: list A list of np.arrays containing the times of the boundaries, one array for each algorithm """ all_boundaries = [] # Get GT boundaries jam_file = os.path.dirname(est_file) + "/../references/" + \ os.path.basename(est_file).replace("json", "jams") ds_prefix = os.path.basename(est_file).split("_")[0] ann_inter, ann_labels = jams2.converters.load_jams_range( jam_file, "sections", context=msaf.prefix_dict[ds_prefix], annotator=annotator_id) ann_times = utils.intervals_to_times(ann_inter) all_boundaries.append(ann_times) # Estimations if algo_ids is None: algo_ids = get_algo_ids(est_file) for algo_id in algo_ids: est_inters = read_estimations(est_file, algo_id, annot_beats, feature=msaf.feat_dict[algo_id]) if len(est_inters) == 0: logging.warning("no estimations for algorithm: %s" % algo_id) continue boundaries = utils.intervals_to_times(est_inters) all_boundaries.append(boundaries) return all_boundaries
def read_references(audio_path, annotator_id=0): """Reads the boundary times and the labels. Parameters ---------- audio_path : str Path to the audio file Returns ------- ref_times : list List of boundary times ref_labels : list List of labels Raises ------ IOError: if `audio_path` doesn't exist. """ # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read references jam_path = os.path.join(ds_path, ds_config.references_dir, os.path.basename(audio_path)[:-4] + ds_config.references_ext) jam = jams.load(jam_path, validate=False) ann = jam.search(namespace='segment_.*')[annotator_id] ref_inters, ref_labels = ann.data.to_interval_values() # Intervals to times ref_times = utils.intervals_to_times(ref_inters) return ref_times, ref_labels
def read_references(audio_path, annotator_id=0): """Reads the boundary times and the labels. Parameters ---------- audio_path : str Path to the audio file Returns ------- ref_times : list List of boundary times ref_labels : list List of labels Raises ------ IOError: if `audio_path` doesn't exist. """ # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read references jam_path = os.path.join(ds_path, ds_config.references_dir, os.path.basename(audio_path)[:-4] + ds_config.references_ext) jam = jams.load(jam_path, validate=False) ann = jam.search(namespace='segment_.*')[annotator_id] ref_inters, ref_labels = ann.to_interval_values() # Intervals to times ref_times = utils.intervals_to_times(ref_inters) return ref_times, ref_labels
def read_references(audio_path): """Reads the boundary times and the labels. Parameters ---------- audio_path : str Path to the audio file Returns ------- ref_times : list List of boundary times ref_labels : list List of labels """ # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read references jam_path = os.path.join( ds_path, msaf.Dataset.references_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.references_ext) ds_prefix = os.path.basename(audio_path).split("_")[0] try: ref_inters, ref_labels = jams2.converters.load_jams_range( jam_path, "sections", context=msaf.prefix_dict[ds_prefix]) except: logging.warning("Reference not found in %s" % jam_path) return [] # Intervals to times ref_times = utils.intervals_to_times(ref_inters) return ref_times, ref_labels
def get_all_est_boundaries(est_file, annot_beats, algo_ids=None, annotator_id=0): """Gets all the estimated boundaries for all the algorithms. Parameters ---------- est_file: str Path to the estimated JAMS file. annot_beats: bool Whether to use the annotated beats or not. algo_ids : list List of algorithm ids to to read boundaries from. If None, all algorithm ids are read. Returns ------- all_boundaries: list A list of np.arrays containing the times of the boundaries, one array for each algorithm """ all_boundaries = [] # Get GT boundaries jam_file = os.path.join(os.path.dirname(est_file), "..", ds_config.references_dir, os.path.basename(est_file)) jam = jams.load(jam_file, validate=False) ann = jam.search(namespace='segment_.*')[annotator_id] ann_inter, ann_labels = ann.data.to_interval_values() ann_times = utils.intervals_to_times(ann_inter) all_boundaries.append(ann_times) # Estimations if algo_ids is None: algo_ids = get_algo_ids(est_file) for algo_id in algo_ids: est_inters, est_labels = read_estimations( est_file, algo_id, annot_beats, feature=msaf.feat_dict[algo_id]) if len(est_inters) == 0: logging.warning("no estimations for algorithm: %s" % algo_id) continue boundaries = utils.intervals_to_times(est_inters) all_boundaries.append(boundaries) return all_boundaries
def plot_one_track(plot_name, file_struct, est_times, est_labels, boundaries_id, labels_id, ds_prefix, title=None): """Plots the results of one track, with ground truth if it exists.""" # Get context if ds_prefix in msaf.prefix_dict.keys(): context = msaf.prefix_dict[ds_prefix] else: context = "function" # Set up the boundaries id bid_lid = boundaries_id if labels_id is not None: bid_lid += " + " + labels_id try: # Read file ref_inter, ref_labels = jams2.converters.load_jams_range(file_struct.ref_file, "sections", annotator=0, context=context) # To times ref_times = utils.intervals_to_times(ref_inter) all_boundaries = [ref_times, est_times] all_labels = [ref_labels, est_labels] algo_ids = ["GT", bid_lid] except: logging.warning("No references found in %s. Not plotting groundtruth" % file_struct.ref_file) all_boundaries = [est_times] all_labels = [est_labels] algo_ids = [bid_lid] N = len(all_boundaries) # Index the labels to normalize them for i, labels in enumerate(all_labels): all_labels[i] = mir_eval.util.index_labels(labels)[0] # Get color map cm = plt.get_cmap('gist_rainbow') max_label = max(max(labels) for labels in all_labels) figsize = (8, 4) plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k') for i, boundaries in enumerate(all_boundaries): color = "b" if i == 0: color = "g" for b in boundaries: plt.axvline(b, i / float(N), (i + 1) / float(N), color=color) if labels_id is not None: labels = all_labels[i] inters = utils.times_to_intervals(boundaries) for label, inter in zip(labels, inters): plt.axvspan(inter[0], inter[1], ymin=i / float(N), ymax=(i + 1) / float(N), alpha=0.6, color=cm(label / float(max_label))) plt.axhline(i / float(N), color="k", linewidth=1) # Format plot _plot_formatting(title, os.path.basename(file_struct.audio_file), algo_ids, all_boundaries[0][-1], N, plot_name)
def get_all_est_boundaries(est_file, annot_beats, algo_ids=None, annotator_id=0): """Gets all the estimated boundaries for all the algorithms. Parameters ---------- est_file: str Path to the estimated JAMS file. annot_beats: bool Whether to use the annotated beats or not. algo_ids : list List of algorithm ids to to read boundaries from. If None, all algorithm ids are read. Returns ------- all_boundaries: list A list of np.arrays containing the times of the boundaries, one array for each algorithm """ all_boundaries = [] # Get GT boundaries jam_file = os.path.join(os.path.dirname(est_file), "..", msaf.Dataset.references_dir, os.path.basename(est_file)) jam = jams.load(jam_file) ann = jam.search(namespace='segment_.*')[annotator_id] ann_inter, ann_labels = ann.data.to_interval_values() ann_times = utils.intervals_to_times(ann_inter) all_boundaries.append(ann_times) # Estimations if algo_ids is None: algo_ids = get_algo_ids(est_file) for algo_id in algo_ids: est_inters, est_labels = read_estimations( est_file, algo_id, annot_beats, feature=msaf.feat_dict[algo_id]) if len(est_inters) == 0: logging.warning("no estimations for algorithm: %s" % algo_id) continue boundaries = utils.intervals_to_times(est_inters) all_boundaries.append(boundaries) return all_boundaries
def plot_one_track(file_struct, est_times, est_labels, boundaries_id, labels_id, title=None): """Plots the results of one track, with ground truth if it exists.""" import matplotlib.pyplot as plt # Set up the boundaries id bid_lid = boundaries_id if labels_id is not None: bid_lid += " + " + labels_id try: # Read file jam = jams.load(file_struct.ref_file) ann = jam.search(namespace='segment_.*')[0] ref_inters, ref_labels = ann.to_interval_values() # To times ref_times = utils.intervals_to_times(ref_inters) all_boundaries = [ref_times, est_times] all_labels = [ref_labels, est_labels] algo_ids = ["GT", bid_lid] except: logging.warning("No references found in %s. Not plotting groundtruth" % file_struct.ref_file) all_boundaries = [est_times] all_labels = [est_labels] algo_ids = [bid_lid] N = len(all_boundaries) # Index the labels to normalize them for i, labels in enumerate(all_labels): all_labels[i] = mir_eval.util.index_labels(labels)[0] # Get color map cm = plt.get_cmap('gist_rainbow') max_label = max(max(labels) for labels in all_labels) figsize = (8, 4) plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k') for i, boundaries in enumerate(all_boundaries): color = "b" if i == 0: color = "g" for b in boundaries: plt.axvline(b, i / float(N), (i + 1) / float(N), color=color) if labels_id is not None: labels = all_labels[i] inters = utils.times_to_intervals(boundaries) for label, inter in zip(labels, inters): plt.axvspan(inter[0], inter[1], ymin=i / float(N), ymax=(i + 1) / float(N), alpha=0.6, color=cm(label / float(max_label))) plt.axhline(i / float(N), color="k", linewidth=1) # Format plot _plot_formatting(title, os.path.basename(file_struct.audio_file), algo_ids, all_boundaries[0][-1], N, None)
def plot_one_track(file_struct, est_times, est_labels, boundaries_id, labels_id, title=None, output_file=None): """Plots the results of one track, with ground truth if it exists.""" import matplotlib.pyplot as plt # Set up the boundaries id bid_lid = boundaries_id if labels_id is not None: bid_lid += " + " + labels_id try: # Read file jam = jams.load(file_struct.ref_file) ann = jam.search(namespace='segment_.*')[0] ref_inters, ref_labels = ann.to_interval_values() # To times ref_times = utils.intervals_to_times(ref_inters) all_boundaries = [ref_times, est_times] all_labels = [ref_labels, est_labels] algo_ids = ["GT", bid_lid] except: logging.warning("No references found in %s. Not plotting groundtruth" % file_struct.ref_file) all_boundaries = [est_times] all_labels = [est_labels] algo_ids = [bid_lid] N = len(all_boundaries) # Index the labels to normalize them for i, labels in enumerate(all_labels): all_labels[i] = mir_eval.util.index_labels(labels)[0] # Get color map cm = plt.get_cmap('gist_rainbow') max_label = max(max(labels) for labels in all_labels) figsize = (8, 4) plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k') for i, boundaries in enumerate(all_boundaries): color = "b" if i == 0: color = "g" for b in boundaries: plt.axvline(b, i / float(N), (i + 1) / float(N), color=color) if labels_id is not None: labels = all_labels[i] inters = utils.times_to_intervals(boundaries) for label, inter in zip(labels, inters): plt.axvspan(inter[0], inter[1], ymin=i / float(N), ymax=(i + 1) / float(N), alpha=0.6, color=cm(label / float(max_label))) plt.axhline(i / float(N), color="k", linewidth=1) # Format plot _plot_formatting(title, os.path.basename(file_struct.audio_file), algo_ids, all_boundaries[0][-1], N, output_file)
def get_all_est_labels(est_file, annot_beats, algo_ids=None, annotator_id=0): """Gets all the estimated boundaries for all the algorithms. Parameters ---------- est_file: str Path to the estimated file (JSON file) annot_beats: bool Whether to use the annotated beats or not. algo_ids : list List of algorithm ids to to read boundaries from. If None, all algorithm ids are read. annotator_id : int Identifier of the annotator. Returns ------- gt_times: np.array Ground Truth boundaries in times. all_labels: list A list of np.arrays containing the labels corresponding to the ground truth boundaries. """ all_labels = [] # Get GT boundaries and labels jam_file = os.path.dirname(est_file) + "/../" + \ msaf.Dataset.references_dir + "/" + \ os.path.basename(est_file).replace("json", "jams") ds_prefix = os.path.basename(est_file).split("_")[0] ann_inter, ann_labels = jams2.converters.load_jams_range( jam_file, "sections", context=msaf.prefix_dict[ds_prefix], annotator=annotator_id) gt_times = utils.intervals_to_times(ann_inter) all_labels.append(ann_labels) # Estimations if algo_ids is None: algo_ids = get_algo_ids(est_file) for algo_id in algo_ids: est_labels = read_estimations(est_file, algo_id, annot_beats, annot_bounds=True, bounds=False, feature=msaf.feat_dict[algo_id]) if len(est_labels) == 0: logging.warning("no estimations for algorithm: %s" % algo_id) continue all_labels.append(est_labels) return gt_times, all_labels
def get_all_est_labels(est_file, annot_beats, algo_ids=None, annotator_id=0): """Gets all the estimated boundaries for all the algorithms. Parameters ---------- est_file: str Path to the estimated file (JSON file) annot_beats: bool Whether to use the annotated beats or not. algo_ids : list List of algorithm ids to to read boundaries from. If None, all algorithm ids are read. annotator_id : int Identifier of the annotator. Returns ------- gt_times: np.array Ground Truth boundaries in times. all_labels: list A list of np.arrays containing the labels corresponding to the ground truth boundaries. """ all_labels = [] # Get GT boundaries and labels jam_file = os.path.join(os.path.dirname(est_file), "..", msaf.Dataset.references_dir, os.path.basename(est_file)) jam = jams.load(jam_file, validate=False) ann = jam.search(namespace='segment_.*')[annotator_id] ann_inter, ann_labels = ann.data.to_interval_values() gt_times = utils.intervals_to_times(ann_inter) all_labels.append(ann_labels) # Estimations if algo_ids is None: algo_ids = get_algo_ids(est_file) for algo_id in algo_ids: est_inters, est_labels = read_estimations( est_file, algo_id, annot_beats, annot_bounds=True, bounds=False, feature=msaf.feat_dict[algo_id]) if len(est_labels) == 0: logging.warning("no estimations for algorithm: %s" % algo_id) continue all_labels.append(est_labels) return gt_times, all_labels
def read_hier_references(jams_file, annotation_id=0, exclude_levels=[]): """Reads hierarchical references from a jams file. Parameters ---------- jams_file : str Path to the jams file. annotation_id : int > 0 Identifier of the annotator to read from. exclude_levels: list List of levels to exclude. Empty list to include all levels. Returns ------- hier_bounds : list List of the segment boundary times in seconds for each level. hier_labels : list List of the segment labels for each level. hier_levels : list List of strings for the level identifiers. """ hier_bounds = [] hier_labels = [] hier_levels = [] jam = jams.load(jams_file) namespaces = [ "segment_salami_upper", "segment_salami_function", "segment_open", "segment_tut", "segment_salami_lower", ] # Remove levels if needed for exclude in exclude_levels: if exclude in namespaces: namespaces.remove(exclude) # Build hierarchy references for ns in namespaces: ann = jam.search(namespace=ns) if not ann: continue ref_inters, ref_labels = ann[annotation_id].data.to_interval_values() hier_bounds.append(utils.intervals_to_times(ref_inters)) hier_labels.append(ref_labels) hier_levels.append(ns) return hier_bounds, hier_labels, hier_levels
def read_hier_references(jams_file, annotation_id=0, exclude_levels=[]): """Reads hierarchical references from a jams file. Parameters ---------- jams_file : str Path to the jams file. annotation_id : int > 0 Identifier of the annotator to read from. exclude_levels: list List of levels to exclude. Empty list to include all levels. Returns ------- hier_bounds : list List of the segment boundary times in seconds for each level. hier_labels : list List of the segment labels for each level. hier_levels : list List of strings for the level identifiers. """ hier_bounds = [] hier_labels = [] hier_levels = [] jam = jams.load(jams_file) namespaces = [ "segment_salami_upper", "segment_salami_function", "segment_open", "segment_tut", "segment_salami_lower" ] # Remove levels if needed for exclude in exclude_levels: if exclude in namespaces: namespaces.remove(exclude) # Build hierarchy references for ns in namespaces: ann = jam.search(namespace=ns) if not ann: continue ref_inters, ref_labels = ann[annotation_id].data.to_interval_values() hier_bounds.append(utils.intervals_to_times(ref_inters)) hier_labels.append(ref_labels) hier_levels.append(ns) return hier_bounds, hier_labels, hier_levels
def get_segments_in_level(level): """Gets the segments of a specific level. Paramters --------- level : str Indentifier of the level within the jams file. Returns ------- times : np.array Boundary times in seconds for the given level. labels : np.array Labels for the given level. """ intervals, labels = jams2.converters.load_jams_range(jams_file, "sections", annotator=annotation_id, context=level) times = utils.intervals_to_times(intervals) return np.array(times), np.array(labels)
def get_segments_in_level(level): """Gets the segments of a specific level. Paramters --------- level : str Indentifier of the level within the jams file. Returns ------- times : np.array Boundary times in seconds for the given level. labels : np.array Labels for the given level. """ intervals, labels = jams2.converters.load_jams_range( jams_file, "sections", annotator=annotation_id, context=level) times = utils.intervals_to_times(intervals) return np.array(times), np.array(labels)
def read_references(audio_path, annotator_id=0): """Reads the boundary times and the labels. Parameters ---------- audio_path : str Path to the audio file Returns ------- ref_times : list List of boundary times ref_labels : list List of labels """ # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read references jam_path = os.path.join(ds_path, msaf.Dataset.references_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.references_ext) ds_prefix = os.path.basename(audio_path).split("_")[0] # Get context if ds_prefix in msaf.prefix_dict.keys(): context = msaf.prefix_dict[ds_prefix] else: context = "function" try: ref_inters, ref_labels = jams2.converters.load_jams_range( jam_path, "sections", context=context, annotator=annotator_id) except: logging.warning("Reference not found in %s" % jam_path) return [] # Intervals to times ref_times = utils.intervals_to_times(ref_inters) return ref_times, ref_labels
def read_references(audio_path, annotator_id=0): """Reads the boundary times and the labels. Parameters ---------- audio_path : str Path to the audio file Returns ------- ref_times : list List of boundary times ref_labels : list List of labels """ # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read references jam_path = os.path.join( ds_path, msaf.Dataset.references_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.references_ext ) try: jam = jams.load(jam_path, validate=False) ann = jam.search(namespace="segment_.*")[annotator_id] ref_inters, ref_labels = ann.data.to_interval_values() except: # TODO: better exception handling logging.warning("Reference not found in %s" % jam_path) return [] # Intervals to times ref_times = utils.intervals_to_times(ref_inters) return ref_times, ref_labels
def read_references(audio_path, annotator_id=0): """Reads the boundary times and the labels. Parameters ---------- audio_path : str Path to the audio file Returns ------- ref_times : list List of boundary times ref_labels : list List of labels """ # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read references jam_path = os.path.join( ds_path, ds_config.references_dir, os.path.basename(audio_path)[:-4] + ds_config.references_ext) try: jam = jams.load(jam_path, validate=False) ann = jam.search(namespace='segment_.*')[annotator_id] ref_inters, ref_labels = ann.data.to_interval_values() except: # TODO: better exception handling logging.warning("Reference not found in %s" % jam_path) return [] # Intervals to times ref_times = utils.intervals_to_times(ref_inters) return ref_times, ref_labels
def compute_information_gain(ann_inter, est_inter, est_file, bins): """Computes the information gain of the est_file from the annotated intervals and the estimated intervals.""" ann_times = utils.intervals_to_times(ann_inter) est_times = utils.intervals_to_times(est_inter) return mir_eval.beat.information_gain(ann_times, est_times, bins=bins)