def use_in_bounds(audio_file, in_bound_times, beats, feats, config): """We update the initial matrices using the annotated bounds.""" bound_idxs = io.align_times(in_bound_times, beats) # Remove first and last boundaries (silent labels) if len(bound_idxs) >= 4: bound_idxs = bound_idxs[1:-1] n_segments = len(bound_idxs) - 1 max_beats_segment = np.max(np.diff(bound_idxs)) # Inititalize the W and H matrices using the previously found bounds initW = np.zeros((feats.shape[1], n_segments, max_beats_segment)) initH = np.zeros((n_segments, feats.shape[0])) for i in xrange(n_segments): dur = bound_idxs[i + 1] - bound_idxs[i] initW[:, i, :dur] = feats[bound_idxs[i]:bound_idxs[i + 1]].T initH[i, bound_idxs[i]] = 1 # Update parameters config["win"] = max_beats_segment config["rank"] = n_segments config["initW"] = initW config["initH"] = initH return config, bound_idxs
def run_flat(file_struct, bounds_module, labels_module, frame_times, config, annotator_id): """Runs the flat algorithms with the specified identifiers on the audio_file. See run_algorithm for more information. """ # Get features to make code nicer features = config["features"].features # Segment using the specified boundaries and labels # Case when boundaries and labels algorithms are the same if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(file_struct, **config) est_idxs, est_labels = S.processFlat() # Different boundary and label algorithms else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(file_struct, in_labels=[], **config) est_idxs, est_labels = S.processFlat() else: try: # Ground-truth boundaries est_times, est_labels = io.read_references( file_struct.audio_file, annotator_id=annotator_id) est_idxs = io.align_times(est_times, frame_times) if est_idxs[0] != 0: est_idxs = np.concatenate(([0], est_idxs)) except IOError: logging.warning("No references found for file: %s" % file_struct.audio_file) return [], [] # Label segments if labels_module is not None: if len(est_idxs) == 2: # two segents only - no need of clustering est_labels = np.array([0]) wfmcs = np.arange(2 * len(est_labels)).reshape(len(est_labels), 2) # dummy to satisfy output of method else: S = labels_module.Segmenter(file_struct, in_bound_idxs=est_idxs, **config) _, est_labels, wfmcs = S.processFlat() # Make sure the first and last boundaries are included if bounds_module: # assume that for ground truth boundaries first and last are included est_times, est_labels = utils.process_segmentation_level( est_idxs, est_labels, features.shape[0], frame_times, config["features"].dur) else: est_times = np.array(est_times) return est_times, est_labels, wfmcs return est_times, est_labels
def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc"], normalize=True): """This method obtains the actual features, their frame times, and the boundary indeces in these features if needed.""" # Read features if self.features is None: # Features stored in a json file self.hpcp, self.mfcc, self.tonnetz, beats, dur, anal = \ io.get_features(self.audio_file, annot_beats=self.annot_beats, framesync=self.framesync) else: # Features passed as parameters feat_prefix = "" if not self.framesync: feat_prefix = "bs_" self.hpcp = self.features["%shpcp" % feat_prefix] self.mfcc = self.features["%smfcc" % feat_prefix] self.tonnetz = self.features["%stonnetz" % feat_prefix] beats = self.features["beats"] dur = self.features["anal"]["dur"] anal = self.features["anal"] # Store analysis parameters self.anal = anal # Use correct frames to find times frame_times = beats if self.framesync: frame_times = U.get_time_frames(dur, anal) # Read input bounds if necessary bound_idxs = None if self.in_bound_times is not None: bound_idxs = io.align_times(self.in_bound_times, frame_times) bound_idxs = np.unique(bound_idxs) # Use specific feature if self.feature_str not in valid_features: raise RuntimeError("Feature %s in not valid for algorithm: %s " "(valid features are %s)." % (self.feature_str, __name__, valid_features)) else: try: F = eval("self." + self.feature_str) except: raise RuntimeError("Feature %s in not supported by MSAF" % (self.feature_str)) # Normalize if needed if normalize: F = U.lognormalize_chroma(F) return F, frame_times, dur, bound_idxs
def run_flat(audio_file, bounds_module, labels_module, frame_times, config, annotator_id): """Runs the flat algorithms with the specified identifiers on the audio_file. See run_algorithm for more information. """ # Get features to make code nicer features = config["features"] # Segment using the specified boundaries and labels # Case when boundaries and labels algorithms are the same if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processFlat() # Different boundary and label algorithms else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(audio_file, in_labels=[], **config) est_idxs, est_labels = S.processFlat() else: try: est_times, est_labels = io.read_references( audio_file, annotator_id=annotator_id) est_idxs = io.align_times(est_times, frame_times[:-1]) if est_idxs[0] != 0: est_idxs = np.concatenate(([0], est_idxs)) if est_idxs[-1] != features["hpcp"].shape[0] - 1: est_idxs = np.concatenate(( est_idxs, [features["hpcp"].shape[0] - 1])) except: logging.warning("No references found for file: %s" % audio_file) return [], [] # Label segments if labels_module is not None: if len(est_idxs) == 2: est_labels = np.array([0]) else: S = labels_module.Segmenter(audio_file, in_bound_idxs=est_idxs, **config) est_labels = S.processFlat()[1] # Make sure the first and last boundaries are included est_times, est_labels = utils.process_segmentation_level( est_idxs, est_labels, features["hpcp"].shape[0], frame_times, features["anal"]["dur"]) return est_times, est_labels
def run_algorithms(audio_file, boundaries_id, labels_id, config, annotator_id=0): """Runs the algorithms with the specified identifiers on the audio_file. Parameters ---------- audio_file: str Path to the audio file to segment. boundaries_id: str Identifier of the boundaries algorithm to use ("gt" for ground truth). labels_id: str Identifier of the labels algorithm to use (None for not labeling). config: dict Dictionary containing the custom parameters of the algorithms to use. annotator_id: int Annotator identificator in the ground truth. Returns ------- est_times: np.array or list List of estimated times for the segment boundaries. If `list`, it will be a list of np.arrays, sorted by segmentation layer. est_labels: np.array or list List of all the labels associated segments. If `list`, it will be a list of np.arrays, sorted by segmentation layer. """ # At this point, features should have already been computed hpcp, mfcc, tonnetz, cqt, gmt, beats, dur, anal = \ io.get_features(audio_file, config["annot_beats"], config["framesync"], pre_features=config["features"]) # Check that there are enough audio frames if hpcp.shape[0] <= msaf.minimum__frames: logging.warning("Audio file too short, or too many few beats " "estimated. Returning empty estimations.") return np.asarray([0, dur]), np.asarray([0], dtype=int) # Get the corresponding modules bounds_module = get_boundaries_module(boundaries_id) labels_module = get_labels_module(labels_id) # Get the correct frame times frame_times = beats if config["framesync"]: frame_times = utils.get_time_frames(dur, anal) # Segment audio based on type of segmentation if config["hier"]: # Hierarchical segmentation if bounds_module is None: raise RuntimeError("A boundary algorithm is needed when using " "hierarchical segmentation.") if labels_module is not None and \ bounds_module.__name__ != labels_module.__name__: raise RuntimeError("The same algorithm for boundaries and labels is " "needed when using hierarchical segmentation.") S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processHierarchical() # Make sure the first and last boundaries are included for each # level in the hierarchy est_times = [] cleaned_est_labels = [] for level in range(len(est_idxs)): est_level_times, est_level_labels = \ utils.process_segmentation_level(est_idxs[level], est_labels[level], hpcp.shape[0], frame_times, dur) est_times.append(est_level_times) cleaned_est_labels.append(est_level_labels) est_labels = cleaned_est_labels else: # Flat segmentation # Segment using the specified boundaries and labels # Case when boundaries and labels algorithms are the same if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processFlat() # Different boundary and label algorithms else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(audio_file, in_labels=[], **config) est_idxs, est_labels = S.processFlat() else: try: est_times, est_labels = io.read_references( audio_file, annotator_id=annotator_id) est_idxs = io.align_times(est_times, frame_times[:-1]) if est_idxs[0] != 0: est_idxs = np.concatenate(([0], est_idxs)) if est_idxs[-1] != hpcp.shape[0] - 1: est_idxs = np.concatenate((est_idxs, [hpcp.shape[0] - 1])) except: logging.warning("No references found for file: %s" % audio_file) return [], [] # Label segments if labels_module is not None: if len(est_idxs) == 2: est_labels = np.array([0]) else: S = labels_module.Segmenter(audio_file, in_bound_idxs=est_idxs, **config) est_labels = S.processFlat()[1] # Make sure the first and last boundaries are included est_times, est_labels = utils.process_segmentation_level( est_idxs, est_labels, hpcp.shape[0], frame_times, dur) return est_times, est_labels