예제 #1
0
def padded_tens_with_zeros(tens, longest_bar_len):  #, freq_dim):
    """
    DEPRECATED
    Pad this tensor (a single matrix interpreted as a tensor slice, so a tensor with third mode) with zeros, 
    for it to be of the same size as every bar.

    Parameters
    ----------
    tens : numpy array
        The current matrix, containing spectrogram of a single bar, interpreted as a tensor (3rd dimension of size 1).
    longest_bar_len : integer
        Size of the longest bar, also the desired size for the tensor to return.

    Returns
    -------
    numpy array
        The tensor, padded with zero, for its 2nd dimension to be of the desired size.

    """
    raise err.OutdatedBehaviorException(
        "Padding the tensor with zeroes correspond to an outdated behvaior, shouldn't be invoked."
    )
    return np.concatenate(
        (tens, np.zeros(tens.shape[0] *
                        (longest_bar_len - tens.shape[1])).reshape(
                            tens.shape[0],
                            (longest_bar_len - tens.shape[1]), 1)),
        axis=1)
예제 #2
0
def load_RWC_dataset(music_folder_path,
                     annotations_type="MIREX10",
                     desired_format=None,
                     downbeats=None):
    """
    Load the data on the RWC dataset, ie path of songs and annotations.
    The annotations can be either AIST or MIREX 10.

    Parameters
    ----------
    music_folder_path : String
        Path of the folder to parse.
    annotations_type : "AIST" [1] or "MIREX10" [2]
        The type of annotations to load (both have a specific behavior and formatting)
        The default is "MIREX10"
    desired_format : DEPRECATED
    downbeats : DEPRECATED

    Raises
    ------
    NotImplementedError
        If the format is not taken in account.

    Returns
    -------
    numpy array
        list of list of paths, each sublist being of the form [song, annotations, downbeat(if specified)].
        
    References
    ----------
    [1] Goto, M. (2006, October). AIST Annotation for the RWC Music Database. In ISMIR (pp. 359-360).
    
    [2] Bimbot, F., Sargent, G., Deruty, E., Guichaoua, C., & Vincent, E. (2014, January). 
    Semiotic description of music structure: An introduction to the Quaero/Metiss structural annotations.

    """
    if downbeats != None or desired_format != None:
        raise err.OutdatedBehaviorException(
            "This version of loading is deprecated.")
    # Load dataset paths at the format "song, annotations, downbeats"
    paths = []
    for file in os.listdir(music_folder_path):
        if file[-4:] == ".wav":
            file_number = "{:03d}".format(int(file[:-4]))
            ann = dm.get_annotation_name_from_song(file_number,
                                                   annotations_type)
            paths.append([file, ann])
    return np.array(paths)
예제 #3
0
def compute_score_from_frontiers_in_bar(reference,
                                        frontiers_in_bar,
                                        bars,
                                        window_length=0.5):
    """
    Computes precision, recall and f measure from estimated frontiers (in bar indexes) and the reference (in seconds).
    Scores are computed from the mir_eval toolbox.

    Parameters
    ----------
    reference : list of tuples
        The reference annotations, as a list of tuples (start, end), in seconds.
    frontiers : list of integers
        The frontiers between segments, in bar indexes.
    bars : list of tuples
        The bars of the song.
    window_length : float, optional
        The window size for the score (tolerance for the frontier to be validated).
        The default is 0.5.

    Returns
    -------
    precision: float \in [0,1]
        Precision of these frontiers,
        ie the proportion of accurately found frontiers among all found frontiers.
    recall: float \in [0,1]
        Recall of these frontiers,
        ie the proportion of accurately found frontiers among all accurate frontiers.
    f_measure: float \in [0,1]
        F measure of these frontiers,
        ie the geometric mean of both precedent scores.
        
    """
    try:
        np.array(bars).shape[1]
    except:
        raise err.OutdatedBehaviorException(
            "Bars is still a list of downbeats, which is an old beavior, and shouldn't happen anymore. To track and to fix."
        )
    frontiers_in_time = frontiers_from_bar_to_time(frontiers_in_bar, bars)
    return compute_score_of_segmentation(
        reference,
        frontiers_to_segments(frontiers_in_time),
        window_length=window_length)
예제 #4
0
def load_bars(persisted_path, song_name):
    """
    Loads the bars for this song, which were persisted after a first computation.

    Parameters
    ----------
    persisted_path : string
        Path where the bars should be found.
    song_name : string
        Name of the song (identifier of the bars to load).

    Returns
    -------
    bars : list of tuple of floats
        The persisted bars for this song.
    """
    raise err.OutdatedBehaviorException(
        "You should use load_or_save_bars(persisted_path, song_path) instead, as it handle the fact that bars weren't computed yet."
    )
    bars = np.load("{}\\bars\\{}.npy".format(persisted_path, song_name))
    return bars
예제 #5
0
def load_spectrogram_and_bars(persisted_path,
                              song_name,
                              feature,
                              hop_length,
                              fmin=98):
    """
    Loads the spectrogram and the bars for this song, which were persisted after a first computation.

    Parameters
    ----------
    persisted_path : string
        Path where the bars and the spectrogram should be found.
    song_name : string
        Name of the song (identifier of the bars to load).
    feature : string
        Feature of the spectrogram, part of the identifier of the spectrogram.
    hop_length : integer
        hop_length of the spectrogram, part of the identifier of the spectrogram.
    fmin : integer
        Minimal frequence for the spectrogram, part of the identifier of the spectrogram.
        The default is 98.

    Returns
    -------
    bars : list of tuple of floats
        The persisted bars for this song.
    spectrogram : numpy array
        The pre-computed spectorgram.
    """
    raise err.OutdatedBehaviorException(
        "You should use load_or_save_spectrogram_and_bars(persisted_path, song_path, feature, hop_length, fmin) instead, as it handle the fact that bars weren't computed yet."
    )
    bars = np.load("{}\\bars\\{}.npy".format(persisted_path, song_name))
    spectrogram = np.load("{}\\spectrograms\\{}_{}_stereo_{}_{}".format(
        persisted_path, song_name, feature, hop_length, fmin))
    return bars, spectrogram
예제 #6
0
def tensorize_barwise(spectrogram,
                      bars,
                      hop_length_seconds,
                      subdivision,
                      midi=False):
    """
    Returns a tensor-spectrogram from a spectrogram and bars starts and ends.
    Each bar of the tensor_spectrogram will contain the same number of frames, define by the "subdivision" parameter.
    These frames are selected from an over-sampled spectrogram, to adapt to the specific size of each bar.

    Parameters
    ----------
    spectrogram : list of list of floats or numpy array
        The spectrogram to return as a tensor-spectrogram.
    bars : list of tuples
        List of the bars (start, end), in seconds, to cut the spectrogram at bar delimitation.
    hop_length_seconds : float
        The hop_length, in seconds.
    subdivision : integer
        The number of subdivision of the bar to be contained in each slice of the tensor.
    midi : boolean, optional
        A boolean to know if the spectrogram is in midi.
        If it is, adds a correction to deletes void bars.
        The default is False.

    Returns
    -------
    tensorly tensor
        The tensor-spectrogram as a tensorly tensor.

    """
    freq_len = spectrogram.shape[0]
    hop = int(hop_length_seconds * 44100)
    if hop != 32 and hop != 64:
        print("hop_length a 44100Hz = " + str(hop) + ", normal ?")
    bars_idx = dm.segments_from_time_to_frame_idx(bars[1:], hop_length_seconds)
    #if hop == 512:
    #raise NotImplementedError("Probably wrong hop here, to debug")
    samples_init = [
        int(
            round(bars_idx[0][0] + k *
                  (bars_idx[0][1] - bars_idx[0][0]) / subdivision))
        for k in range(subdivision)
    ]

    if midi:
        raise err.OutdatedBehaviorException("Should'nt be used, still bugged")

    tens = np.array(spectrogram[:,
                                samples_init]).reshape(freq_len, subdivision,
                                                       1)
    #tens = padded_tens_with_zeros(tens_init, longest_bar)

    for bar in bars_idx[1:]:
        t_0 = bar[0]
        t_1 = bar[1]
        samples = [
            int(round(t_0 + k * (t_1 - t_0) / subdivision))
            for k in range(subdivision)
        ]
        if samples[-1] < spectrogram.shape[1]:
            current_bar_tensor_spectrogram = spectrogram[:, samples].reshape(
                freq_len, subdivision, 1)
            tens = np.append(tens, current_bar_tensor_spectrogram, axis=2)
        else:
            break

    return tl.tensor(tens, dtype=tl.float32)