Python get_dataset_files Examples, msaf.input_output.get_dataset_files Python Examples

Example #1

0

Show file

def process(in_path,
            sonify_beats=False,
            n_jobs=1,
            overwrite=False,
            out_file="out.json",
            out_beats="out_beats.wav",
            ds_name="*"):
    """Main process to compute features.

    Parameters
    ----------
    in_path: str
        Path to the file or dataset to compute the features.
    sonify_beats: bool
        Whether to sonify the beats on top of the audio file
        (single file mode only).
    n_jobs: int
        Number of threads (collection mode only).
    overwrite: bool
        Whether to overwrite the previously computed features.
    out_file: str
        Path to the output json file (single file mode only).
    out_beats: str
        Path to the new file containing the sonified beats.
    ds_name: str
        Name of the prefix of the dataset (e.g., Beatles)
    """

    # If in_path it's a file, we only compute one file
    if os.path.isfile(in_path):
        file_struct = FileStruct(in_path)
        file_struct.features_file = out_file
        compute_all_features(file_struct, sonify_beats, overwrite, out_beats)

    elif os.path.isdir(in_path):
        # Check that in_path exists
        utils.ensure_dir(in_path)

        # Get files
        file_structs = io.get_dataset_files(in_path, ds_name=ds_name)

        # Compute features using joblib
        Parallel(n_jobs=n_jobs)(delayed(compute_all_features)(
            file_struct, sonify_beats, overwrite, out_beats)
                                for file_struct in file_structs)

Example #2

0

Show file

def process(in_path, audio_beats=False, n_jobs=1, overwrite=False):
    """Main process."""

    # If in_path it's a file, we only compute one file
    if os.path.isfile(in_path):
        compute_all_features(in_path, audio_beats, overwrite)

    elif os.path.isdir(in_path):
        # Check that in_path exists
        utils.ensure_dir(in_path)

        # Get files
        file_structs = io.get_dataset_files(in_path)

        # Compute features using joblib
        Parallel(n_jobs=n_jobs)(delayed(compute_all_features)(
            file_struct, audio_beats, overwrite)
            for file_struct in file_structs)

Example #3

0

Show file

File: featextract.py Project: beckgom/msaf

def process(
    in_path, sonify_beats=False, n_jobs=1, overwrite=False, out_file="out.json", out_beats="out_beats.wav", ds_name="*"
):
    """Main process to compute features.

    Parameters
    ----------
    in_path: str
        Path to the file or dataset to compute the features.
    sonify_beats: bool
        Whether to sonify the beats on top of the audio file
        (single file mode only).
    n_jobs: int
        Number of threads (collection mode only).
    overwrite: bool
        Whether to overwrite the previously computed features.
    out_file: str
        Path to the output json file (single file mode only).
    out_beats: str
        Path to the new file containing the sonified beats.
    ds_name: str
        Name of the prefix of the dataset (e.g., Beatles)
    """

    # If in_path it's a file, we only compute one file
    if os.path.isfile(in_path):
        file_struct = FileStruct(in_path)
        file_struct.features_file = out_file
        compute_all_features(file_struct, sonify_beats, overwrite, out_beats)

    elif os.path.isdir(in_path):
        # Check that in_path exists
        utils.ensure_dir(in_path)

        # Get files
        file_structs = io.get_dataset_files(in_path, ds_name=ds_name)

        # Compute features using joblib
        Parallel(n_jobs=n_jobs)(
            delayed(compute_all_features)(file_struct, sonify_beats, overwrite, out_beats)
            for file_struct in file_structs
        )

Example #4

0

Show file

File: run.py Project: beckgom/msaf

def process(
    in_path,
    annot_beats=False,
    feature="hpcp",
    ds_name="*",
    framesync=False,
    boundaries_id=msaf.DEFAULT_BOUND_ID,
    labels_id=msaf.DEFAULT_LABEL_ID,
    hier=False,
    sonify_bounds=False,
    plot=False,
    n_jobs=4,
    annotator_id=0,
    config=None,
    out_bounds="out_bounds.wav",
    out_sr=22050,
):
    """Main process to segment a file or a collection of files.

    Parameters
    ----------
    in_path: str
        Input path. If a directory, MSAF will function in collection mode.
        If audio file, MSAF will be in single file mode.
    annot_beats: bool
        Whether to use annotated beats or not. Only available in collection
        mode.
    feature: str
        String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
    ds_name: str
        Prefix of the dataset to be used (e.g. SALAMI, Isophonics)
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    boundaries_id: str
        Identifier of the boundaries algorithm (use "gt" for groundtruth)
    labels_id: str
        Identifier of the labels algorithm (use None to not compute labels)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    sonify_bounds: bool
        Whether to write an output audio file with the annotated boundaries
        or not (only available in Single File Mode).
    plot: bool
        Whether to plot the boundaries and labels against the ground truth.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id: int
        Annotator identificator in the ground truth.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.
    out_bounds: str
        Path to the output for the sonified boundaries (only in single file
        mode, when sonify_bounds is True.
    out_sr : int
        Sampling rate for the sonified bounds.

    Returns
    -------
    results : list
        List containing tuples of (est_times, est_labels) of estimated
        boundary times and estimated labels.
        If labels_id is None, est_labels will be a list of -1.
    """
    # Seed random to reproduce results
    np.random.seed(123)

    # Make sure that the features used are correct
    assert feature in msaf.AVAILABLE_FEATS

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync, boundaries_id, labels_id)
        config["features"] = None

    # Save multi-segment (hierarchical) configuration
    config["hier"] = hier

    if os.path.isfile(in_path):
        # Single file mode
        # Get (if they exitst) or compute features
        file_struct = msaf.io.FileStruct(in_path)
        if not os.path.exists(file_struct.features_file):
            # Compute and save features
            all_features = featextract.compute_features_for_audio_file(in_path)
            msaf.utils.ensure_dir(os.path.dirname(file_struct.features_file))
            msaf.featextract.save_features(file_struct.features_file, all_features)
        # Get correct features
        config["features"] = msaf.io.get_features(in_path, annot_beats=annot_beats, framesync=framesync)

        # And run the algorithms
        est_times, est_labels = run_algorithms(in_path, boundaries_id, labels_id, config, annotator_id=annotator_id)

        if sonify_bounds:
            logging.info("Sonifying boundaries in %s..." % out_bounds)
            audio_hq, sr = librosa.load(in_path, sr=out_sr)
            utils.sonify_clicks(audio_hq, est_times, out_bounds, out_sr)

        if plot:
            plotting.plot_one_track(file_struct, est_times, est_labels, boundaries_id, labels_id, ds_name)

        # Save estimations
        msaf.utils.ensure_dir(os.path.dirname(file_struct.est_file))
        io.save_estimations(file_struct, est_times, est_labels, boundaries_id, labels_id, **config)

        return est_times, est_labels
    else:
        # Collection mode
        file_structs = io.get_dataset_files(in_path, ds_name)

        # Call in parallel
        return Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct, boundaries_id, labels_id, config, annotator_id=annotator_id)
            for file_struct in file_structs[:]
        )

Example #5

0

Show file

def process(in_path,
            boundaries_id=msaf.config.default_bound_id,
            labels_id=msaf.config.default_label_id,
            annot_beats=False,
            framesync=False,
            feature="pcp",
            hier=False,
            save=False,
            n_jobs=4,
            annotator_id=0,
            config=None):
    """Main process to evaluate algorithms' results.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    feature: str
        String representing the feature to be used (e.g. pcp, mfcc, tonnetz)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    save: boolean
        Whether to save the results into the SQLite database.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id : int
        Number identifiying the annotator.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

    # Hierarchical segmentation
    config["hier"] = hier

    # Remove actual features
    config.pop("features", None)

    # Get out file in case we want to save results
    out_file = get_results_file_name(boundaries_id, labels_id, config,
                                     annotator_id)

    # All evaluations
    results = pd.DataFrame()

    if os.path.isfile(in_path):
        # Single File mode
        evals = [
            process_track(in_path,
                          boundaries_id,
                          labels_id,
                          config,
                          annotator_id=annotator_id)
        ]
    else:
        # Collection mode
        # If out_file already exists, do not compute new results
        if os.path.exists(out_file):
            logging.info("Results already exists, reading from file %s" %
                         out_file)
            results = pd.read_csv(out_file)
            print_results(results)
            return results

        # Get files
        file_structs = io.get_dataset_files(in_path)

        logging.info("Evaluating %d tracks..." % len(file_structs))

        # Evaluate in parallel
        evals = Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct,
                                   boundaries_id,
                                   labels_id,
                                   config,
                                   annotator_id=annotator_id)
            for file_struct in file_structs[:])

    # Aggregate evaluations in pandas format
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing results in %s" % out_file)
        results.to_csv(out_file)

    return results

Example #6

0

Show file

File: eval.py Project: beckgom/msaf

def process(
    in_path,
    boundaries_id=msaf.DEFAULT_BOUND_ID,
    labels_id=msaf.DEFAULT_LABEL_ID,
    ds_name="*",
    annot_beats=False,
    framesync=False,
    feature="hpcp",
    hier=False,
    save=False,
    n_jobs=4,
    annotator_id=0,
    config=None,
):
    """Main process.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    feature: str
        String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    save: boolean
        Whether to save the results into the SQLite database.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id : int
        Number identifiying the annotator.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync, boundaries_id, labels_id)

    # Hierarchical segmentation
    config["hier"] = hier

    # Remove actual features
    config.pop("features", None)

    # Sanity check for hierarchical evaluation
    if hier:
        try:
            from mir_eval import hierarchy
        except ImportError:
            logging.error(
                "An experimental mir_eval version is needed to "
                "evaluate hierarchical segments. Please, download it"
                " from: https://github.com/bmcfee/mir_eval"
                " and checkout the tmeasures branch."
            )
            return []

    # Get out file in case we want to save results
    out_file = get_results_file_name(boundaries_id, labels_id, config, ds_name, annotator_id)

    # All evaluations
    results = pd.DataFrame()

    if os.path.isfile(in_path):
        # Single File mode
        evals = [process_track(in_path, boundaries_id, labels_id, config, annotator_id=annotator_id)]
    else:
        # Collection mode
        # If out_file already exists, do not compute new results
        if os.path.exists(out_file):
            logging.info("Results already exists, reading from file %s" % out_file)
            results = pd.read_csv(out_file)
            print_results(results)
            return results

        # Get files
        file_structs = io.get_dataset_files(in_path, ds_name)

        logging.info("Evaluating %d tracks..." % len(file_structs))

        # Evaluate in parallel
        evals = Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct, boundaries_id, labels_id, config, annotator_id=annotator_id)
            for file_struct in file_structs[:]
        )

    # Aggregate evaluations in pandas format
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing results in %s" % out_file)
        results.to_csv(out_file)

    return results

Example #7

0

Show file

def process(in_path, annot_beats=False, feature="mfcc", ds_name="*",
			framesync=False, boundaries_id="gt", labels_id=None, hier=False,
			sonify_bounds=False, plot=False, n_jobs=4, annotator_id=0,
			config=None, out_bounds="out_bounds.wav"):
	"""Main process to segment a file or a collection of files.

	Parameters
	----------
	in_path: str
		Input path. If a directory, MSAF will function in collection mode.
		If audio file, MSAF will be in single file mode.
	annot_beats: bool
		Whether to use annotated beats or not. Only available in collection
		mode.
	feature: str
		String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
	ds_name: str
		Prefix of the dataset to be used (e.g. SALAMI, Isophonics)
	framesync: str
		Whether to use framesync features or not (default: False -> beatsync)
	boundaries_id: str
		Identifier of the boundaries algorithm (use "gt" for groundtruth)
	labels_id: str
		Identifier of the labels algorithm (use None to not compute labels)
	hier : bool
		Whether to compute a hierarchical or flat segmentation.
	sonify_bounds: bool
		Whether to write an output audio file with the annotated boundaries
		or not (only available in Single File Mode).
	plot: bool
		Whether to plot the boundaries and labels against the ground truth.
	n_jobs: int
		Number of processes to run in parallel. Only available in collection
		mode.
	annotator_id: int
		Annotator identificator in the ground truth.
	config: dict
		Dictionary containing custom configuration parameters for the
		algorithms.	 If None, the default parameters are used.
	out_bounds: str
		Path to the output for the sonified boundaries (only in single file
		mode, when sonify_bounds is True.

	Returns
	-------
	results : list
		List containing tuples of (est_times, est_labels) of estimated
		boundary times and estimated labels.
		If labels_id is None, est_labels will be a list of -1.
	"""
	# Seed random to reproduce results
	np.random.seed(123)

	# Set up configuration based on algorithms parameters
	if config is None:
		config = io.get_configuration(feature, annot_beats, framesync,
									  boundaries_id, labels_id)
		config["features"] = None
		config["hier"] = hier

	if os.path.isfile(in_path):
		# Single file mode
		# Get (if they exitst) or compute features
		# TODO:Modularize!
		file_struct = msaf.io.FileStruct(in_path)
		if os.path.exists(file_struct.features_file):
			feat_prefix = ""
			if not framesync:
				feat_prefix = "bs_"
			features = {}
		
			'''Mi: added the Gammatone featureset'''
		
			features["%shpcp" % feat_prefix], features["%smfcc" % feat_prefix], \
				features["%stonnetz" % feat_prefix], features["%scqt" % feat_prefix], \
				features["%sgmt" % feat_prefix], features["beats"], dur, \
				features["anal"] = msaf.io.get_features(in_path,
														annot_beats=annot_beats,
														framesync=framesync,
														pre_features=None)
		else:
			# Compute and save features
			features = featextract.compute_features_for_audio_file(in_path)
			msaf.utils.ensure_dir(os.path.dirname(file_struct.features_file))
			msaf.featextract.save_features(file_struct.features_file, features)

		config["features"] = features
		config["hier"] = hier

		# And run the algorithms
		est_times, est_labels = run_algorithms(in_path, boundaries_id,
											   labels_id, config,
											   annotator_id=annotator_id)

		if sonify_bounds:
			logging.info("Sonifying boundaries in %s..." % out_bounds)
			fs = 44100
			audio_hq, sr = librosa.load(in_path, sr=fs)
			utils.sonify_clicks(audio_hq, est_times, out_bounds, fs)

		if plot:
			plotting.plot_one_track(save_plot_path, file_struct, est_times, est_labels,
									boundaries_id, labels_id, ds_name)

		# Save estimations
		msaf.utils.ensure_dir(os.path.dirname(file_struct.est_file))
		config["features"] = None
		io.save_estimations(file_struct.est_file, est_times, est_labels,
							boundaries_id, labels_id, **config)

		return est_times, est_labels
	else:
		# Collection mode
		file_structs = io.get_dataset_files(in_path, ds_name)

		# Call in parallel
		return Parallel(n_jobs=n_jobs)(delayed(process_track)(
			file_struct, boundaries_id, labels_id, config,
			annotator_id=annotator_id, plot=plot) for file_struct in file_structs[:])

Example #8

0

Show file

File: run.py Project: sebasgverde/music_structure-segmentator

def process(in_path,
            annot_beats=False,
            feature="pcp",
            framesync=False,
            boundaries_id=msaf.config.default_bound_id,
            labels_id=msaf.config.default_label_id,
            hier=False,
            sonify_bounds=False,
            plot=False,
            n_jobs=4,
            annotator_id=0,
            config=None,
            out_bounds="out_bounds.wav",
            out_sr=22050,
            output_file=None):
    """Main process to segment a file or a collection of files.

    Parameters
    ----------
    in_path: str
        Input path. If a directory, MSAF will function in collection mode.
        If audio file, MSAF will be in single file mode.
    annot_beats: bool
        Whether to use annotated beats or not.
    feature: str
        String representing the feature to be used (e.g. pcp, mfcc, tonnetz)
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    boundaries_id: str
        Identifier of the boundaries algorithm (use "gt" for groundtruth)
    labels_id: str
        Identifier of the labels algorithm (use None to not compute labels)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    sonify_bounds: bool
        Whether to write an output audio file with the annotated boundaries
        or not (only available in Single File Mode).
    plot: bool
        Whether to plot the boundaries and labels against the ground truth.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id: int
        Annotator identificator in the ground truth.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.
    out_bounds: str
        Path to the output for the sonified boundaries (only in single file
        mode, when sonify_bounds is True.
    out_sr : int
        Sampling rate for the sonified bounds.

    Returns
    -------
    results : list
        List containing tuples of (est_times, est_labels) of estimated
        boundary times and estimated labels.
        If labels_id is None, est_labels will be a list of -1.
    """
    # Seed random to reproduce results
    np.random.seed(123)

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)
        config["features"] = None

    # Save multi-segment (hierarchical) configuration
    config["hier"] = hier
    if not os.path.exists(in_path):
        raise NoAudioFileError("File or directory does not exists, %s" %
                               in_path)
    if os.path.isfile(in_path):
        # Single file mode
        # Get (if they exitst) or compute features
        file_struct = msaf.io.FileStruct(in_path)

        # Use temporary file in single mode
        file_struct.features_file = msaf.config.features_tmp_file

        # Get features
        config["features"] = Features.select_features(feature, file_struct,
                                                      annot_beats, framesync)

        # And run the algorithms
        est_times, est_labels = run_algorithms(file_struct,
                                               boundaries_id,
                                               labels_id,
                                               config,
                                               annotator_id=annotator_id)

        if sonify_bounds:
            logging.info("Sonifying boundaries in %s..." % out_bounds)
            audio_hq, sr = librosa.load(in_path, sr=out_sr)
            utils.sonify_clicks(audio_hq, est_times, out_bounds, out_sr)

        if plot:
            custom_plotting.plot_one_track(file_struct,
                                           est_times,
                                           est_labels,
                                           boundaries_id,
                                           labels_id,
                                           output_file=output_file)

        # TODO: Only save if needed
        # Save estimations
        msaf.utils.ensure_dir(os.path.dirname(file_struct.est_file))
        io.save_estimations(file_struct, est_times, est_labels, boundaries_id,
                            labels_id, **config)

        return est_times, est_labels
    else:
        # Collection mode
        file_structs = io.get_dataset_files(in_path)

        return Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct,
                                   boundaries_id,
                                   labels_id,
                                   config,
                                   annotator_id=annotator_id)
            for file_struct in file_structs[:])

Example #9

0

Show file

def process(in_path,
            annot_beats=False,
            feature="mfcc",
            ds_name="*",
            framesync=False,
            boundaries_id="gt",
            labels_id=None,
            out_audio=False,
            plot=False,
            n_jobs=4,
            config=None):
    """Main process to segment a file or a collection of files.

    Parameters
    ----------
    in_path: str
        Input path. If a directory, MSAF will function in collection mode.
        If audio file, MSAF will be in single file mode.
    annot_beats: bool
        Whether to use annotated beats or not. Only available in collection
        mode.
    feature: str
        String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
    ds_name: str
        Prefix of the dataset to be used (e.g. SALAMI, Isophonics)
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    boundaries_id: str
        Identifier of the boundaries algorithm (use "gt" for groundtruth)
    labels_id: str
        Identifier of the labels algorithm (use None to not compute labels)
    out_audio: bool
        Whether to write an output audio file with the annotated boundaries
        or not (only available in Single File Mode).
    plot: bool
        Whether to plot the boundaries and labels against the ground truth.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Returns
    -------
    results : list
        List containing tuples of (est_times, est_labels) of estimated
        boundary times and estimated labels.
        If labels_id is None, est_labels will be a list of -1.
    """

    # Seed random to reproduce results
    np.random.seed(123)

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id, algorithms)

    if os.path.isfile(in_path):
        # Single file mode
        audio, features = featextract.compute_features_for_audio_file(in_path)
        config["features"] = features

        # And run the algorithms
        est_times, est_labels = run_algorithms(in_path, boundaries_id,
                                               labels_id, config)

        if out_audio:
            # TODO: Set a nicer output file name?
            #out_file = in_path[:-4] + msaf.out_boundaries_ext
            out_file = "out_boundaries.wav"
            logging.info("Sonifying boundaries in %s..." % out_file)
            fs = 44100
            audio_hq = featextract.read_audio(in_path, fs)
            utils.write_audio_boundaries(
                audio_hq, np.delete(est_times, [1, len(est_times) - 2]),
                out_file, fs)

        if plot:
            plotting.plot_one_track(in_path, est_times, est_labels,
                                    boundaries_id, labels_id)

        return est_times, est_labels
    else:
        # Collection mode
        file_structs = io.get_dataset_files(in_path, ds_name)

        # Call in parallel
        return Parallel(n_jobs=n_jobs)(delayed(process_track)(
            file_struct, boundaries_id, labels_id, config)
                                       for file_struct in file_structs[:])

Example #10

0

Show file

def process(in_path,
            boundaries_id,
            labels_id=None,
            ds_name="*",
            annot_beats=False,
            framesync=False,
            feature="hpcp",
            save=False,
            n_jobs=4,
            config=None):
    """Main process.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    annot_bounds : boolean
        Whether to use the annotated bounds or not.
    save: boolean
        Whether to save the results into the SQLite database.
    params : dict
        Additional parameters (e.g. features)

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id, algorithms)

    # Get out file in case we want to save results
    out_file = get_results_file_name(boundaries_id, labels_id, config, ds_name)

    # If out_file already exists, do not compute new results
    if os.path.exists(out_file):
        logging.info("Results already exists, reading from file %s" % out_file)
        results = pd.read_csv(out_file)
        print_results(results)
        return results

    # Get files
    file_structs = io.get_dataset_files(in_path, ds_name)

    logging.info("Evaluating %d tracks..." % len(file_structs))

    # All evaluations
    results = pd.DataFrame()

    # Evaluate in parallel
    evals = Parallel(n_jobs=n_jobs)(
        delayed(process_track)(file_struct, boundaries_id, labels_id, config)
        for file_struct in file_structs[:])

    # Aggregat evaluations in pandas format
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing average results in %s" % out_file)
        results.to_csv(out_file)

    return results

Example #11

0

Show file

File: eval.py Project: wangsix/msaf

def process(in_path, boundaries_id=msaf.config.default_bound_id,
            labels_id=msaf.config.default_label_id, annot_beats=False,
            framesync=False, feature="pcp", hier=False, save=False,
            out_file=None, n_jobs=4, annotator_id=0, config=None):
    """Main process to evaluate algorithms' results.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    feature: str
        String representing the feature to be used (e.g. pcp, mfcc, tonnetz)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    save: boolean
        Whether to save the results into the `out_file` csv file.
    out_file: str
        Path to the csv file to save the results (if `None` and `save = True`
        it will save the results in the default file name obtained by
        calling `get_results_file_name`).
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id : int
        Number identifiying the annotator.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

    # Hierarchical segmentation
    config["hier"] = hier

    # Remove actual features
    config.pop("features", None)

    # Get out file in case we want to save results
    if out_file is None:
        out_file = get_results_file_name(boundaries_id, labels_id, config,
                                         annotator_id)

    # If out_file already exists, read and return them
    if os.path.exists(out_file):
        logging.warning("Results already exists, reading from file %s" %
                        out_file)
        results = pd.read_csv(out_file)
        print_results(results)
        return results

    # Perform actual evaluations
    if os.path.isfile(in_path):
        # Single File mode
        evals = [process_track(in_path, boundaries_id, labels_id, config,
                               annotator_id=annotator_id)]
    else:
        # Collection mode
        # Get files
        file_structs = io.get_dataset_files(in_path)

        # Evaluate in parallel
        logging.info("Evaluating %d tracks..." % len(file_structs))
        evals = Parallel(n_jobs=n_jobs)(delayed(process_track)(
            file_struct, boundaries_id, labels_id, config,
            annotator_id=annotator_id) for file_struct in file_structs[:])

    # Aggregate evaluations in pandas format
    results = pd.DataFrame()
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing results in %s" % out_file)
        results.to_csv(out_file)

    return results