Example #1
0
File: run.py Project: kacrouse/msaf
def run_algorithms(audio_file, boundaries_id, labels_id, config,
                   annotator_id=0):
    """Runs the algorithms with the specified identifiers on the audio_file.

    Parameters
    ----------
    audio_file: str
        Path to the audio file to segment.
    boundaries_id: str
        Identifier of the boundaries algorithm to use ("gt" for ground truth).
    labels_id: str
        Identifier of the labels algorithm to use (None for not labeling).
    config: dict
        Dictionary containing the custom parameters of the algorithms to use.
    annotator_id: int
        Annotator identificator in the ground truth.

    Returns
    -------
    est_times: np.array or list
        List of estimated times for the segment boundaries.
        If `list`, it will be a list of np.arrays, sorted by segmentation
        layer.
    est_labels: np.array or list
        List of all the labels associated segments.
        If `list`, it will be a list of np.arrays, sorted by segmentation
        layer.
    """
    # Features should have already been computed, let's read them
    features = io.get_features(audio_file, config["annot_beats"],
                               config["framesync"])
    config["features"] = features

    # Check that there are enough audio frames
    if features["hpcp"].shape[0] <= msaf.minimum__frames:
        logging.warning("Audio file too short, or too many few beats "
                        "estimated. Returning empty estimations.")
        return np.asarray([0, features["anal"]["dur"]]), \
            np.asarray([0], dtype=int)

    # Get the corresponding modules
    bounds_module = get_boundaries_module(boundaries_id)
    labels_module = get_labels_module(labels_id)

    # Get the correct frame times
    frame_times = features["beats"]
    if config["framesync"]:
        frame_times = utils.get_time_frames(features["anal"]["dur"],
                                            features["anal"])

    # Segment audio based on type of segmentation
    run_fun = run_hierarchical if config["hier"] else run_flat
    est_times, est_labels = run_fun(audio_file, bounds_module, labels_module,
                                    frame_times, config, annotator_id)

    return est_times, est_labels
Example #2
0
    def _preprocess(self,
                    valid_features=["hpcp", "tonnetz", "mfcc"],
                    normalize=True):
        """This method obtains the actual features, their frame times,
        and the boundary indeces in these features if needed."""
        # Read features
        if self.features is None:
            # Features stored in a json file
            self.hpcp, self.mfcc, self.tonnetz, beats, dur, anal = \
                io.get_features(self.audio_file, annot_beats=self.annot_beats,
                                framesync=self.framesync)
        else:
            # Features passed as parameters
            feat_prefix = ""
            if not self.framesync:
                feat_prefix = "bs_"
            self.hpcp = self.features["%shpcp" % feat_prefix]
            self.mfcc = self.features["%smfcc" % feat_prefix]
            self.tonnetz = self.features["%stonnetz" % feat_prefix]
            beats = self.features["beats"]
            dur = self.features["anal"]["dur"]
            anal = self.features["anal"]

        # Store analysis parameters
        self.anal = anal

        # Use correct frames to find times
        frame_times = beats
        if self.framesync:
            frame_times = U.get_time_frames(dur, anal)

        # Read input bounds if necessary
        bound_idxs = None
        if self.in_bound_times is not None:
            bound_idxs = io.align_times(self.in_bound_times, frame_times)
            bound_idxs = np.unique(bound_idxs)

        # Use specific feature
        if self.feature_str not in valid_features:
            raise RuntimeError("Feature %s in not valid for algorithm: %s "
                               "(valid features are %s)." %
                               (self.feature_str, __name__, valid_features))
        else:
            try:
                F = eval("self." + self.feature_str)
            except:
                raise RuntimeError("Feature %s in not supported by MSAF" %
                                   (self.feature_str))

        # Normalize if needed
        if normalize:
            F = U.lognormalize_chroma(F)

        return F, frame_times, dur, bound_idxs
Example #3
0
def run_algorithms(audio_file, boundaries_id, labels_id, config,
				   annotator_id=0):
	"""Runs the algorithms with the specified identifiers on the audio_file.

	Parameters
	----------
	audio_file: str
		Path to the audio file to segment.
	boundaries_id: str
		Identifier of the boundaries algorithm to use ("gt" for ground truth).
	labels_id: str
		Identifier of the labels algorithm to use (None for not labeling).
	config: dict
		Dictionary containing the custom parameters of the algorithms to use.
	annotator_id: int
		Annotator identificator in the ground truth.

	Returns
	-------
	est_times: np.array or list
		List of estimated times for the segment boundaries.
		If `list`, it will be a list of np.arrays, sorted by segmentation layer.
	est_labels: np.array or list
		List of all the labels associated segments.
		If `list`, it will be a list of np.arrays, sorted by segmentation layer.
	"""

	# At this point, features should have already been computed
	hpcp, mfcc, tonnetz, cqt, gmt, beats, dur, anal =  \
			io.get_features(audio_file, config["annot_beats"],
							config["framesync"],
							pre_features=config["features"])

	# Check that there are enough audio frames
	if hpcp.shape[0] <= msaf.minimum__frames:
		logging.warning("Audio file too short, or too many few beats "
						"estimated. Returning empty estimations.")
		return np.asarray([0, dur]), np.asarray([0], dtype=int)

	# Get the corresponding modules
	bounds_module = get_boundaries_module(boundaries_id)
	labels_module = get_labels_module(labels_id)

	# Get the correct frame times
	frame_times = beats
	if config["framesync"]:
		frame_times = utils.get_time_frames(dur, anal)

	# Segment audio based on type of segmentation
	if config["hier"]:
		# Hierarchical segmentation
		if bounds_module is None:
			raise RuntimeError("A boundary algorithm is needed when using "
							   "hierarchical segmentation.")
		if labels_module is not None and \
				bounds_module.__name__ != labels_module.__name__:
			raise RuntimeError("The same algorithm for boundaries and labels is "
							   "needed when using hierarchical segmentation.")
		S = bounds_module.Segmenter(audio_file, **config)
		est_idxs, est_labels = S.processHierarchical()

		# Make sure the first and last boundaries are included for each
		# level in the hierarchy
		est_times = []
		cleaned_est_labels = []
		for level in range(len(est_idxs)):
			est_level_times, est_level_labels = \
				utils.process_segmentation_level(est_idxs[level],
												 est_labels[level],
												 hpcp.shape[0],
												 frame_times,
												 dur)
			est_times.append(est_level_times)
			cleaned_est_labels.append(est_level_labels)
		est_labels = cleaned_est_labels
	else:
		# Flat segmentation
		# Segment using the specified boundaries and labels
		# Case when boundaries and labels algorithms are the same
		if bounds_module is not None and labels_module is not None and \
				bounds_module.__name__ == labels_module.__name__:
			S = bounds_module.Segmenter(audio_file, **config)
			est_idxs, est_labels = S.processFlat()
		# Different boundary and label algorithms
		else:
			# Identify segment boundaries
			if bounds_module is not None:
				S = bounds_module.Segmenter(audio_file, in_labels=[], **config)
				est_idxs, est_labels = S.processFlat()
			else:
				try:
					est_times, est_labels = io.read_references(
						audio_file, annotator_id=annotator_id)
					est_idxs = io.align_times(est_times, frame_times[:-1])
					if est_idxs[0] != 0:
						est_idxs = np.concatenate(([0], est_idxs))
					if est_idxs[-1] != hpcp.shape[0] - 1:
						est_idxs = np.concatenate((est_idxs, [hpcp.shape[0] - 1]))
				except:
					logging.warning("No references found for file: %s" %
									audio_file)
					return [], []

			# Label segments
			if labels_module is not None:
				if len(est_idxs) == 2:
					est_labels = np.array([0])
				else:
					S = labels_module.Segmenter(audio_file,
												in_bound_idxs=est_idxs,
												**config)
					est_labels = S.processFlat()[1]

		# Make sure the first and last boundaries are included
		est_times, est_labels = utils.process_segmentation_level(
			est_idxs, est_labels, hpcp.shape[0], frame_times, dur)

	return est_times, est_labels
Example #4
0
def get_features(audio_path, annot_beats=False, framesync=False):
    """
    Gets the features of an audio file given the audio_path.

    Parameters
    ----------
    audio_path: str
        Path to the audio file.
    annot_beats: bool
        Whether to use annotated beats or not.
    framesync: bool
        Whether to use framesync features or not.

    Return
    ------
    features : dict
        A dictionary with the following keys:
            "hpcp": np.array((N, 12)), Chromagram
            "mfcc": np.array((N, 13)), MFCC
            "tonnetz": np.array((N, 6)), Tonnetz
            "cqt": np.array((N, msaf.Anal.cqt_bins)), Constant-Q transform
            "beats": np.array(T), Beats in seconds
            "anal" : dict, Parameters of analysis of track (e.g. sampling rate)
    """
    # Dataset path
    ds_path = os.path.dirname(os.path.dirname(audio_path))

    # Read Estimations
    features_path = os.path.join(
        ds_path, msaf.Dataset.features_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.features_ext
    )
    with open(features_path, "r") as f:
        feats = json.load(f)

    # Beat Synchronous Feats
    if framesync:
        feat_str = "framesync"
        beats = None
    else:
        if annot_beats:
            # Read references
            try:
                annotation_path = os.path.join(
                    ds_path,
                    msaf.Dataset.references_dir,
                    os.path.basename(audio_path)[:-4] + msaf.Dataset.references_ext,
                )
                # TODO: Better exception handling
                jam = jams.load(annotation_path, validate=False)
            except:
                raise RuntimeError("No references found in file %s" % annotation_path)

            feat_str = "ann_beatsync"
            beats = []
            beat_data = jam.beats[0].data
            if beat_data == []:
                raise ValueError
            for data in beat_data:
                beats.append(data.time.value)
            beats = np.unique(beats)
        else:
            feat_str = "est_beatsync"
            beats = np.asarray(feats["beats"]["times"])

    # Build actual features dictionary
    features = {}
    features["hpcp"] = np.asarray(feats[feat_str]["hpcp"])
    features["mfcc"] = np.asarray(feats[feat_str]["mfcc"])
    features["tonnetz"] = np.asarray(feats[feat_str]["tonnetz"])
    features["cqt"] = np.asarray(feats[feat_str]["cqt"])
    features["beats"] = np.asarray(feats["beats"]["times"])
    features["anal"] = feats["analysis"]

    # Frame times might be shorter than the actual number of features.
    if framesync:
        frame_times = utils.get_time_frames(features["anal"]["dur"], features["anal"])
        features["hpcp"] = features["hpcp"][: len(frame_times)]
        features["mfcc"] = features["mfcc"][: len(frame_times)]
        features["tonnetz"] = features["tonnetz"][: len(frame_times)]
        features["cqt"] = features["cqt"][: len(frame_times)]

    return features
Example #5
0
def get_features(audio_path,
                 annot_beats=False,
                 framesync=False,
                 pre_features=None):
    """
	Gets the features of an audio file given the audio_path.

	Parameters
	----------
	audio_path: str
		Path to the audio file.
	annot_beats: bool
		Whether to use annotated beats or not.
	framesync: bool
		Whether to use framesync features or not.
	pre_features: dict
		Pre computed features as a dictionary.
		`None` for reading them form the json file.

	Return
	------
	C: np.array((N, 12))
		(Beat-sync) Chromagram
	M: np.array((N, 13))
		(Beat-sync) MFCC
	T: np.array((N, 6))
		(Beat-sync) Tonnetz
	cqt: np.array((N, msaf.Anal.cqt_bins))
		(Beat-sync) Constant-Q transform
	beats: np.array(T)
		Beats in seconds
	dur: float
		Song duration
	analysis : dict
		Parameters of analysis of track (e.g. sampling rate)
	"""
    if pre_features is None:
        # Dataset path
        ds_path = os.path.dirname(os.path.dirname(audio_path))

        # Read Estimations
        features_path = os.path.join(
            ds_path, msaf.Dataset.features_dir,
            os.path.basename(audio_path)[:-4] + msaf.Dataset.features_ext)
        with open(features_path, "r") as f:
            feats = json.load(f)

        # Beat Synchronous Feats
        if framesync:
            feat_str = "framesync"
            beats = None
        else:
            if annot_beats:
                # Read references
                try:
                    annotation_path = os.path.join(
                        ds_path, msaf.Dataset.references_dir,
                        os.path.basename(audio_path)[:-4] +
                        msaf.Dataset.references_ext)
                    jam = jams2.load(annotation_path)
                except:
                    raise RuntimeError("No references found in file %s" %
                                       annotation_path)

                feat_str = "ann_beatsync"
                beats = []
                beat_data = jam.beats[0].data
                if beat_data == []:
                    raise ValueError
                for data in beat_data:
                    beats.append(data.time.value)
                beats = np.unique(beats)
            else:
                feat_str = "est_beatsync"
                beats = np.asarray(feats["beats"]["times"])
        C = np.asarray(feats[feat_str]["hpcp"])
        M = np.asarray(feats[feat_str]["mfcc"])
        T = np.asarray(feats[feat_str]["tonnetz"])
        cqt = np.asarray(feats[feat_str]["cqt"])
        '''Mi: added the Gammatone features'''
        G = np.asarray(feats[feat_str]["gmt"])

        analysis = feats["analysis"]
        dur = analysis["dur"]

        # Frame times might be shorter than the actual number of features.
        if framesync:
            frame_times = utils.get_time_frames(dur, analysis)
            C = C[:len(frame_times)]
            M = M[:len(frame_times)]
            T = T[:len(frame_times)]
            G = G[:len(frame_times)]

    else:
        feat_prefix = ""
        if not framesync:
            feat_prefix = "bs_"
        C = pre_features["%shpcp" % feat_prefix]
        M = pre_features["%smfcc" % feat_prefix]
        T = pre_features["%stonnetz" % feat_prefix]
        cqt = pre_features["%scqt" % feat_prefix]
        G = pre_features["%sgmt" % feat_prefix]
        beats = pre_features["beats"]
        dur = pre_features["anal"]["dur"]
        analysis = pre_features["anal"]

    return C, M, T, cqt, G, beats, dur, analysis
Example #6
0
def get_features(audio_path, annot_beats=False, framesync=False,
				 pre_features=None):
	"""
	Gets the features of an audio file given the audio_path.

	Parameters
	----------
	audio_path: str
		Path to the audio file.
	annot_beats: bool
		Whether to use annotated beats or not.
	framesync: bool
		Whether to use framesync features or not.
	pre_features: dict
		Pre computed features as a dictionary.
		`None` for reading them form the json file.

	Return
	------
	C: np.array((N, 12))
		(Beat-sync) Chromagram
	M: np.array((N, 13))
		(Beat-sync) MFCC
	T: np.array((N, 6))
		(Beat-sync) Tonnetz
	cqt: np.array((N, msaf.Anal.cqt_bins))
		(Beat-sync) Constant-Q transform
	beats: np.array(T)
		Beats in seconds
	dur: float
		Song duration
	analysis : dict
		Parameters of analysis of track (e.g. sampling rate)
	"""
	if pre_features is None:
		# Dataset path
		ds_path = os.path.dirname(os.path.dirname(audio_path))

		# Read Estimations
		features_path = os.path.join(ds_path, msaf.Dataset.features_dir,
			os.path.basename(audio_path)[:-4] + msaf.Dataset.features_ext)
		with open(features_path, "r") as f:
			feats = json.load(f)

		# Beat Synchronous Feats
		if framesync:
			feat_str = "framesync"
			beats = None
		else:
			if annot_beats:
				# Read references
				try:
					annotation_path = os.path.join(
						ds_path, msaf.Dataset.references_dir,
						os.path.basename(audio_path)[:-4] +
						msaf.Dataset.references_ext)
					jam = jams2.load(annotation_path)
				except:
					raise RuntimeError("No references found in file %s" %
									annotation_path)

				feat_str = "ann_beatsync"
				beats = []
				beat_data = jam.beats[0].data
				if beat_data == []:
					raise ValueError
				for data in beat_data:
					beats.append(data.time.value)
				beats = np.unique(beats)
			else:
				feat_str = "est_beatsync"
				beats = np.asarray(feats["beats"]["times"])
		C = np.asarray(feats[feat_str]["hpcp"])
		M = np.asarray(feats[feat_str]["mfcc"])
		T = np.asarray(feats[feat_str]["tonnetz"])
		cqt = np.asarray(feats[feat_str]["cqt"])

		'''Mi: added the Gammatone features'''
		G = np.asarray(feats[feat_str]["gmt"])

		analysis = feats["analysis"]
		dur = analysis["dur"]

		# Frame times might be shorter than the actual number of features.
		if framesync:
			frame_times = utils.get_time_frames(dur, analysis)
			C = C[:len(frame_times)]
			M = M[:len(frame_times)]
			T = T[:len(frame_times)]
			G = G[:len(frame_times)]

	else:
		feat_prefix = ""
		if not framesync:
			feat_prefix = "bs_"
		C = pre_features["%shpcp" % feat_prefix]
		M = pre_features["%smfcc" % feat_prefix]
		T = pre_features["%stonnetz" % feat_prefix]
		cqt = pre_features["%scqt" % feat_prefix]
		G = pre_features["%sgmt" % feat_prefix]
		beats = pre_features["beats"]
		dur = pre_features["anal"]["dur"]
		analysis = pre_features["anal"]

	return C, M, T, cqt, G, beats, dur, analysis
Example #7
0
def get_features(audio_path, annot_beats=False, framesync=False):
    """
    Gets the features of an audio file given the audio_path.

    Parameters
    ----------
    audio_path: str
        Path to the audio file.
    annot_beats: bool
        Whether to use annotated beats or not.
    framesync: bool
        Whether to use framesync features or not.

    Return
    ------
    features : dict
        A dictionary with the following keys:
            "hpcp": np.array((N, 12)), Chromagram
            "mfcc": np.array((N, 13)), MFCC
            "tonnetz": np.array((N, 6)), Tonnetz
            "cqt": np.array((N, msaf.Anal.cqt_bins)), Constant-Q transform
            "beats": np.array(T), Beats in seconds
            "anal" : dict, Parameters of analysis of track (e.g. sampling rate)
    """
    # Dataset path
    ds_path = os.path.dirname(os.path.dirname(audio_path))

    # Read Estimations
    features_path = os.path.join(
        ds_path, msaf.Dataset.features_dir,
        os.path.basename(audio_path)[:-4] + msaf.Dataset.features_ext)
    with open(features_path, "r") as f:
        feats = json.load(f)

    # Beat Synchronous Feats
    if framesync:
        feat_str = "framesync"
        beats = None
    else:
        if annot_beats:
            # Read references
            try:
                annotation_path = os.path.join(
                    ds_path, msaf.Dataset.references_dir,
                    os.path.basename(audio_path)[:-4] +
                    msaf.Dataset.references_ext)
                # TODO: Better exception handling
                jam = jams.load(annotation_path, validate=False)
            except:
                raise RuntimeError("No references found in file %s" %
                                   annotation_path)

            feat_str = "ann_beatsync"
            beats = []
            beat_data = jam.beats[0].data
            if beat_data == []:
                raise ValueError
            for data in beat_data:
                beats.append(data.time.value)
            beats = np.unique(beats)
        else:
            feat_str = "est_beatsync"
            beats = np.asarray(feats["beats"]["times"])

    # Build actual features dictionary
    features = {}
    features["hpcp"] = np.asarray(feats[feat_str]["hpcp"])
    features["mfcc"] = np.asarray(feats[feat_str]["mfcc"])
    features["tonnetz"] = np.asarray(feats[feat_str]["tonnetz"])
    features["cqt"] = np.asarray(feats[feat_str]["cqt"])
    features["beats"] = np.asarray(feats["beats"]["times"])
    features["anal"] = feats["analysis"]

    # Frame times might be shorter than the actual number of features.
    if framesync:
        frame_times = utils.get_time_frames(features["anal"]["dur"],
                                            features["anal"])
        features["hpcp"] = features["hpcp"][:len(frame_times)]
        features["mfcc"] = features["mfcc"][:len(frame_times)]
        features["tonnetz"] = features["tonnetz"][:len(frame_times)]
        features["cqt"] = features["cqt"][:len(frame_times)]

    return features