Example #1
0
def use_in_bounds(audio_file, in_bound_times, beats, feats, config):
    """We update the initial matrices using the annotated bounds."""
    bound_idxs = io.align_times(in_bound_times, beats)

    # Remove first and last boundaries (silent labels)
    if len(bound_idxs) >= 4:
        bound_idxs = bound_idxs[1:-1]

    n_segments = len(bound_idxs) - 1
    max_beats_segment = np.max(np.diff(bound_idxs))

    # Inititalize the W and H matrices using the previously found bounds
    initW = np.zeros((feats.shape[1], n_segments, max_beats_segment))
    initH = np.zeros((n_segments, feats.shape[0]))
    for i in xrange(n_segments):
        dur = bound_idxs[i + 1] - bound_idxs[i]
        initW[:, i, :dur] = feats[bound_idxs[i]:bound_idxs[i + 1]].T
        initH[i, bound_idxs[i]] = 1

    # Update parameters
    config["win"] = max_beats_segment
    config["rank"] = n_segments
    config["initW"] = initW
    config["initH"] = initH

    return config, bound_idxs
Example #2
0
File: run.py Project: georgid/msaf
def run_flat(file_struct, bounds_module, labels_module, frame_times, config,
             annotator_id):
    """Runs the flat algorithms with the specified identifiers on the
    audio_file. See run_algorithm for more information.
    """
    # Get features to make code nicer
    features = config["features"].features

    # Segment using the specified boundaries and labels
    # Case when boundaries and labels algorithms are the same
    if bounds_module is not None and labels_module is not None and \
            bounds_module.__name__ == labels_module.__name__:
        S = bounds_module.Segmenter(file_struct, **config)
        est_idxs, est_labels = S.processFlat()
    # Different boundary and label algorithms
    else:
        # Identify segment boundaries
        if bounds_module is not None:
            S = bounds_module.Segmenter(file_struct, in_labels=[], **config)
            est_idxs, est_labels = S.processFlat()
        else:
            try:
                # Ground-truth boundaries
                est_times, est_labels = io.read_references(
                    file_struct.audio_file, annotator_id=annotator_id)
                est_idxs = io.align_times(est_times, frame_times)
                if est_idxs[0] != 0:
                    est_idxs = np.concatenate(([0], est_idxs))
            except IOError:
                logging.warning("No references found for file: %s" %
                                file_struct.audio_file)
                return [], []

        # Label segments
        if labels_module is not None:
            if len(est_idxs) == 2: # two segents only - no need of clustering
                est_labels = np.array([0])
                wfmcs = np.arange(2 * len(est_labels)).reshape(len(est_labels),
                                                                     2)  # dummy to satisfy output of method

            else:
                S = labels_module.Segmenter(file_struct,
                                            in_bound_idxs=est_idxs,
                                            **config)
                _, est_labels, wfmcs = S.processFlat()


    # Make sure the first and last boundaries are included
    if bounds_module: # assume that for ground truth boundaries first and last are included
        est_times, est_labels = utils.process_segmentation_level(
                est_idxs, est_labels, features.shape[0], frame_times,
                config["features"].dur)
    else:
        est_times = np.array(est_times)

    return est_times, est_labels, wfmcs



    return est_times, est_labels
Example #3
0
    def _preprocess(self,
                    valid_features=["hpcp", "tonnetz", "mfcc"],
                    normalize=True):
        """This method obtains the actual features, their frame times,
        and the boundary indeces in these features if needed."""
        # Read features
        if self.features is None:
            # Features stored in a json file
            self.hpcp, self.mfcc, self.tonnetz, beats, dur, anal = \
                io.get_features(self.audio_file, annot_beats=self.annot_beats,
                                framesync=self.framesync)
        else:
            # Features passed as parameters
            feat_prefix = ""
            if not self.framesync:
                feat_prefix = "bs_"
            self.hpcp = self.features["%shpcp" % feat_prefix]
            self.mfcc = self.features["%smfcc" % feat_prefix]
            self.tonnetz = self.features["%stonnetz" % feat_prefix]
            beats = self.features["beats"]
            dur = self.features["anal"]["dur"]
            anal = self.features["anal"]

        # Store analysis parameters
        self.anal = anal

        # Use correct frames to find times
        frame_times = beats
        if self.framesync:
            frame_times = U.get_time_frames(dur, anal)

        # Read input bounds if necessary
        bound_idxs = None
        if self.in_bound_times is not None:
            bound_idxs = io.align_times(self.in_bound_times, frame_times)
            bound_idxs = np.unique(bound_idxs)

        # Use specific feature
        if self.feature_str not in valid_features:
            raise RuntimeError("Feature %s in not valid for algorithm: %s "
                               "(valid features are %s)." %
                               (self.feature_str, __name__, valid_features))
        else:
            try:
                F = eval("self." + self.feature_str)
            except:
                raise RuntimeError("Feature %s in not supported by MSAF" %
                                   (self.feature_str))

        # Normalize if needed
        if normalize:
            F = U.lognormalize_chroma(F)

        return F, frame_times, dur, bound_idxs
Example #4
0
File: run.py Project: kacrouse/msaf
def run_flat(audio_file, bounds_module, labels_module, frame_times, config,
             annotator_id):
    """Runs the flat algorithms with the specified identifiers on the
    audio_file. See run_algorithm for more information.
    """
    # Get features to make code nicer
    features = config["features"]

    # Segment using the specified boundaries and labels
    # Case when boundaries and labels algorithms are the same
    if bounds_module is not None and labels_module is not None and \
            bounds_module.__name__ == labels_module.__name__:
        S = bounds_module.Segmenter(audio_file, **config)
        est_idxs, est_labels = S.processFlat()
    # Different boundary and label algorithms
    else:
        # Identify segment boundaries
        if bounds_module is not None:
            S = bounds_module.Segmenter(audio_file, in_labels=[], **config)
            est_idxs, est_labels = S.processFlat()
        else:
            try:
                est_times, est_labels = io.read_references(
                    audio_file, annotator_id=annotator_id)
                est_idxs = io.align_times(est_times, frame_times[:-1])
                if est_idxs[0] != 0:
                    est_idxs = np.concatenate(([0], est_idxs))
                if est_idxs[-1] != features["hpcp"].shape[0] - 1:
                    est_idxs = np.concatenate((
                        est_idxs, [features["hpcp"].shape[0] - 1]))
            except:
                logging.warning("No references found for file: %s" %
                                audio_file)
                return [], []

        # Label segments
        if labels_module is not None:
            if len(est_idxs) == 2:
                est_labels = np.array([0])
            else:
                S = labels_module.Segmenter(audio_file,
                                            in_bound_idxs=est_idxs,
                                            **config)
                est_labels = S.processFlat()[1]

    # Make sure the first and last boundaries are included
    est_times, est_labels = utils.process_segmentation_level(
        est_idxs, est_labels, features["hpcp"].shape[0], frame_times,
        features["anal"]["dur"])

    return est_times, est_labels
Example #5
0
def run_algorithms(audio_file, boundaries_id, labels_id, config,
				   annotator_id=0):
	"""Runs the algorithms with the specified identifiers on the audio_file.

	Parameters
	----------
	audio_file: str
		Path to the audio file to segment.
	boundaries_id: str
		Identifier of the boundaries algorithm to use ("gt" for ground truth).
	labels_id: str
		Identifier of the labels algorithm to use (None for not labeling).
	config: dict
		Dictionary containing the custom parameters of the algorithms to use.
	annotator_id: int
		Annotator identificator in the ground truth.

	Returns
	-------
	est_times: np.array or list
		List of estimated times for the segment boundaries.
		If `list`, it will be a list of np.arrays, sorted by segmentation layer.
	est_labels: np.array or list
		List of all the labels associated segments.
		If `list`, it will be a list of np.arrays, sorted by segmentation layer.
	"""

	# At this point, features should have already been computed
	hpcp, mfcc, tonnetz, cqt, gmt, beats, dur, anal =  \
			io.get_features(audio_file, config["annot_beats"],
							config["framesync"],
							pre_features=config["features"])

	# Check that there are enough audio frames
	if hpcp.shape[0] <= msaf.minimum__frames:
		logging.warning("Audio file too short, or too many few beats "
						"estimated. Returning empty estimations.")
		return np.asarray([0, dur]), np.asarray([0], dtype=int)

	# Get the corresponding modules
	bounds_module = get_boundaries_module(boundaries_id)
	labels_module = get_labels_module(labels_id)

	# Get the correct frame times
	frame_times = beats
	if config["framesync"]:
		frame_times = utils.get_time_frames(dur, anal)

	# Segment audio based on type of segmentation
	if config["hier"]:
		# Hierarchical segmentation
		if bounds_module is None:
			raise RuntimeError("A boundary algorithm is needed when using "
							   "hierarchical segmentation.")
		if labels_module is not None and \
				bounds_module.__name__ != labels_module.__name__:
			raise RuntimeError("The same algorithm for boundaries and labels is "
							   "needed when using hierarchical segmentation.")
		S = bounds_module.Segmenter(audio_file, **config)
		est_idxs, est_labels = S.processHierarchical()

		# Make sure the first and last boundaries are included for each
		# level in the hierarchy
		est_times = []
		cleaned_est_labels = []
		for level in range(len(est_idxs)):
			est_level_times, est_level_labels = \
				utils.process_segmentation_level(est_idxs[level],
												 est_labels[level],
												 hpcp.shape[0],
												 frame_times,
												 dur)
			est_times.append(est_level_times)
			cleaned_est_labels.append(est_level_labels)
		est_labels = cleaned_est_labels
	else:
		# Flat segmentation
		# Segment using the specified boundaries and labels
		# Case when boundaries and labels algorithms are the same
		if bounds_module is not None and labels_module is not None and \
				bounds_module.__name__ == labels_module.__name__:
			S = bounds_module.Segmenter(audio_file, **config)
			est_idxs, est_labels = S.processFlat()
		# Different boundary and label algorithms
		else:
			# Identify segment boundaries
			if bounds_module is not None:
				S = bounds_module.Segmenter(audio_file, in_labels=[], **config)
				est_idxs, est_labels = S.processFlat()
			else:
				try:
					est_times, est_labels = io.read_references(
						audio_file, annotator_id=annotator_id)
					est_idxs = io.align_times(est_times, frame_times[:-1])
					if est_idxs[0] != 0:
						est_idxs = np.concatenate(([0], est_idxs))
					if est_idxs[-1] != hpcp.shape[0] - 1:
						est_idxs = np.concatenate((est_idxs, [hpcp.shape[0] - 1]))
				except:
					logging.warning("No references found for file: %s" %
									audio_file)
					return [], []

			# Label segments
			if labels_module is not None:
				if len(est_idxs) == 2:
					est_labels = np.array([0])
				else:
					S = labels_module.Segmenter(audio_file,
												in_bound_idxs=est_idxs,
												**config)
					est_labels = S.processFlat()[1]

		# Make sure the first and last boundaries are included
		est_times, est_labels = utils.process_segmentation_level(
			est_idxs, est_labels, hpcp.shape[0], frame_times, dur)

	return est_times, est_labels