def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc", "cqt", "tempogram"], normalize=True): """This method obtains the actual features.""" # Read features if self.features is None: self.features = io.get_features(self.audio_file, annot_beats=self.annot_beats, framesync=self.framesync) # Use specific feature if self.feature_str not in valid_features: raise RuntimeError("Feature %s in not valid for algorithm: %s " "(valid features are %s)." % (self.feature_str, __name__, valid_features)) else: try: F = self.features[self.feature_str] except KeyError: raise RuntimeError("Feature %s in not supported by MSAF" % (self.feature_str)) # Normalize if needed if normalize: F = U.lognormalize_chroma(F) return F
def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc", "cqt"], normalize=True): """This method obtains the actual features.""" # Read features if self.features is None: self.features = io.get_features(self.audio_file, annot_beats=self.annot_beats, framesync=self.framesync) # Use specific feature if self.feature_str not in valid_features: raise RuntimeError("Feature %s in not valid for algorithm: %s " "(valid features are %s)." % (self.feature_str, __name__, valid_features)) else: try: F = self.features[self.feature_str] except KeyError: raise RuntimeError("Feature %s in not supported by MSAF" % (self.feature_str)) # Normalize if needed if normalize: F = U.lognormalize_chroma(F) return F
def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc", "cqt", "gmt"], normalize=True): """This method obtains the actual features.""" # Read features self.hpcp, self.mfcc, self.tonnetz, self.cqt, self.gmt, beats, dur, self.anal = \ io.get_features(self.audio_file, annot_beats=self.annot_beats, framesync=self.framesync, pre_features=self.features) # Use specific feature if self.feature_str not in valid_features: raise RuntimeError("Feature %s in not valid for algorithm: %s " "(valid features are %s)." % (self.feature_str, __name__, valid_features)) else: try: F = eval("self." + self.feature_str) except: raise RuntimeError("Feature %s in not supported by MSAF" % (self.feature_str)) # Normalize if needed if normalize: F = U.lognormalize_chroma(F) return F
def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc", "cqt"], normalize=True): """This method obtains the actual features.""" # Read features self.hpcp, self.mfcc, self.tonnetz, self.cqt, beats, dur, self.anal = \ io.get_features(self.audio_file, annot_beats=self.annot_beats, framesync=self.framesync, pre_features=self.features) # Use specific feature if self.feature_str not in valid_features: raise RuntimeError("Feature %s in not valid for algorithm: %s " "(valid features are %s)." % (self.feature_str, __name__, valid_features)) else: try: F = eval("self." + self.feature_str) except: raise RuntimeError("Feature %s in not supported by MSAF" % (self.feature_str)) # Normalize if needed if normalize: F = U.lognormalize_chroma(F) return F
def run_algorithms(audio_file, boundaries_id, labels_id, config, annotator_id=0): """Runs the algorithms with the specified identifiers on the audio_file. Parameters ---------- audio_file: str Path to the audio file to segment. boundaries_id: str Identifier of the boundaries algorithm to use ("gt" for ground truth). labels_id: str Identifier of the labels algorithm to use (None for not labeling). config: dict Dictionary containing the custom parameters of the algorithms to use. annotator_id: int Annotator identificator in the ground truth. Returns ------- est_times: np.array or list List of estimated times for the segment boundaries. If `list`, it will be a list of np.arrays, sorted by segmentation layer. est_labels: np.array or list List of all the labels associated segments. If `list`, it will be a list of np.arrays, sorted by segmentation layer. """ # Features should have already been computed, let's read them features = io.get_features(audio_file, config["annot_beats"], config["framesync"]) config["features"] = features # Check that there are enough audio frames if features["hpcp"].shape[0] <= msaf.minimum__frames: logging.warning("Audio file too short, or too many few beats " "estimated. Returning empty estimations.") return np.asarray([0, features["anal"]["dur"]]), \ np.asarray([0], dtype=int) # Get the corresponding modules bounds_module = get_boundaries_module(boundaries_id) labels_module = get_labels_module(labels_id) # Get the correct frame times frame_times = features["beats"] if config["framesync"]: frame_times = utils.get_time_frames(features["anal"]["dur"], features["anal"]) # Segment audio based on type of segmentation run_fun = run_hierarchical if config["hier"] else run_flat est_times, est_labels = run_fun(audio_file, bounds_module, labels_module, frame_times, config, annotator_id) return est_times, est_labels
def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc"], normalize=True): """This method obtains the actual features, their frame times, and the boundary indeces in these features if needed.""" # Read features if self.features is None: # Features stored in a json file self.hpcp, self.mfcc, self.tonnetz, beats, dur, anal = \ io.get_features(self.audio_file, annot_beats=self.annot_beats, framesync=self.framesync) else: # Features passed as parameters feat_prefix = "" if not self.framesync: feat_prefix = "bs_" self.hpcp = self.features["%shpcp" % feat_prefix] self.mfcc = self.features["%smfcc" % feat_prefix] self.tonnetz = self.features["%stonnetz" % feat_prefix] beats = self.features["beats"] dur = self.features["anal"]["dur"] anal = self.features["anal"] # Store analysis parameters self.anal = anal # Use correct frames to find times frame_times = beats if self.framesync: frame_times = U.get_time_frames(dur, anal) # Read input bounds if necessary bound_idxs = None if self.in_bound_times is not None: bound_idxs = io.align_times(self.in_bound_times, frame_times) bound_idxs = np.unique(bound_idxs) # Use specific feature if self.feature_str not in valid_features: raise RuntimeError("Feature %s in not valid for algorithm: %s " "(valid features are %s)." % (self.feature_str, __name__, valid_features)) else: try: F = eval("self." + self.feature_str) except: raise RuntimeError("Feature %s in not supported by MSAF" % (self.feature_str)) # Normalize if needed if normalize: F = U.lognormalize_chroma(F) return F, frame_times, dur, bound_idxs
def run_algorithms(audio_file, boundaries_id, labels_id, config, annotator_id=0): """Runs the algorithms with the specified identifiers on the audio_file. Parameters ---------- audio_file: str Path to the audio file to segment. boundaries_id: str Identifier of the boundaries algorithm to use ("gt" for ground truth). labels_id: str Identifier of the labels algorithm to use (None for not labeling). config: dict Dictionary containing the custom parameters of the algorithms to use. annotator_id: int Annotator identificator in the ground truth. Returns ------- est_times: np.array or list List of estimated times for the segment boundaries. If `list`, it will be a list of np.arrays, sorted by segmentation layer. est_labels: np.array or list List of all the labels associated segments. If `list`, it will be a list of np.arrays, sorted by segmentation layer. """ # At this point, features should have already been computed hpcp, mfcc, tonnetz, cqt, gmt, beats, dur, anal = \ io.get_features(audio_file, config["annot_beats"], config["framesync"], pre_features=config["features"]) # Check that there are enough audio frames if hpcp.shape[0] <= msaf.minimum__frames: logging.warning("Audio file too short, or too many few beats " "estimated. Returning empty estimations.") return np.asarray([0, dur]), np.asarray([0], dtype=int) # Get the corresponding modules bounds_module = get_boundaries_module(boundaries_id) labels_module = get_labels_module(labels_id) # Get the correct frame times frame_times = beats if config["framesync"]: frame_times = utils.get_time_frames(dur, anal) # Segment audio based on type of segmentation if config["hier"]: # Hierarchical segmentation if bounds_module is None: raise RuntimeError("A boundary algorithm is needed when using " "hierarchical segmentation.") if labels_module is not None and \ bounds_module.__name__ != labels_module.__name__: raise RuntimeError("The same algorithm for boundaries and labels is " "needed when using hierarchical segmentation.") S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processHierarchical() # Make sure the first and last boundaries are included for each # level in the hierarchy est_times = [] cleaned_est_labels = [] for level in range(len(est_idxs)): est_level_times, est_level_labels = \ utils.process_segmentation_level(est_idxs[level], est_labels[level], hpcp.shape[0], frame_times, dur) est_times.append(est_level_times) cleaned_est_labels.append(est_level_labels) est_labels = cleaned_est_labels else: # Flat segmentation # Segment using the specified boundaries and labels # Case when boundaries and labels algorithms are the same if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processFlat() # Different boundary and label algorithms else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(audio_file, in_labels=[], **config) est_idxs, est_labels = S.processFlat() else: try: est_times, est_labels = io.read_references( audio_file, annotator_id=annotator_id) est_idxs = io.align_times(est_times, frame_times[:-1]) if est_idxs[0] != 0: est_idxs = np.concatenate(([0], est_idxs)) if est_idxs[-1] != hpcp.shape[0] - 1: est_idxs = np.concatenate((est_idxs, [hpcp.shape[0] - 1])) except: logging.warning("No references found for file: %s" % audio_file) return [], [] # Label segments if labels_module is not None: if len(est_idxs) == 2: est_labels = np.array([0]) else: S = labels_module.Segmenter(audio_file, in_bound_idxs=est_idxs, **config) est_labels = S.processFlat()[1] # Make sure the first and last boundaries are included est_times, est_labels = utils.process_segmentation_level( est_idxs, est_labels, hpcp.shape[0], frame_times, dur) return est_times, est_labels