def penalty_cost_from_arg(penalty_func, segment_length):
    """
    Returns a penalty cost, function of the size of the segment.
    The penalty function has to be specified, and is bound to evolve in the near future,
    so this docstring won't explain it.
    Instead, you'll have to read the code, sorry! It is pretty straightforward though.

    Parameters
    ----------
    penalty_func : string
        Identifier of the penalty function.
    segment_length : integer
        Size of the segment.

    Returns
    -------
    float
        The penalty cost.

    """
    if penalty_func == "modulo4":
        # if segment_length % 4 != 0:
        #     return 1/(min(segment_length % 4, -segment_length % 4))
        # else:
        #     return 0
        if segment_length % 4 == 0:
            return 0
        elif segment_length % 2 == 0:
            return 1 / 2
        else:
            return 1
    if penalty_func == "modulo8":
        if segment_length == 8:
            return 0
        elif segment_length % 4 == 0:
            return 1 / 4
        elif segment_length % 2 == 0:
            return 1 / 2
        else:
            return 1
    if penalty_func == "moduloSmall8and4":
        if segment_length > 12:
            return 100
        elif segment_length == 8:
            return 0
        elif segment_length == 4:
            return 1 / 4
        elif segment_length % 2 == 0:
            return 1 / 2
        else:
            return 1
    if penalty_func == "sargentdemi":
        return abs(segment_length - 8)**(1 / 2)
    if penalty_func == "sargentun":
        return abs(segment_length - 8)
    if penalty_func == "sargentdeux":
        return abs(segment_length - 8)**2
    else:
        raise err.InvalidArgumentValueException(
            "Penalty function not understood.")
Esempio n. 2
0
def get_segmentation_from_txt(path, annotations_type):
    """
    Reads the segmentation annotations, and returns it in a list of tuples (start, end, index as a number)
    This function has been developped for AIST and MIREX10 annotations, adapted for these types of annotations.
    It will not work with another set of annotation.

    Parameters
    ----------
    path : String
        The path to the annotation.
    annotations_type : "AIST" [1] or "MIREX10" [2]
        The type of annotations to load (both have a specific behavior and formatting)
        
    Raises
    ------
    NotImplementedError
        If the type of annotations is neither AIST or MIREX10

    Returns
    -------
    segments : list of tuples (float, float, integer)
        The segmentation, formatted in a list of tuples, and with labels as numbers (easier to interpret computationnally).

    References
    ----------
    [1] Goto, M. (2006, October). AIST Annotation for the RWC Music Database. In ISMIR (pp. 359-360).
    
    [2] Bimbot, F., Sargent, G., Deruty, E., Guichaoua, C., & Vincent, E. (2014, January). 
    Semiotic description of music structure: An introduction to the Quaero/Metiss structural annotations.

    """
    file_seg = open(path)
    segments = []
    labels = []
    for part in file_seg.readlines():
        tupl = part.split("\t")
        if tupl[2] not in labels:  # If label wasn't already found in this annotation
            idx = len(labels)
            labels.append(tupl[2])
        else:  # If this label was found for another segment
            idx = labels.index(tupl[2])
        if annotations_type == "AIST":
            segments.append(((int(tupl[0]) / 100), (int(tupl[1]) / 100), idx))
        elif annotations_type == "MIREX10":
            segments.append((round(float(tupl[0]),
                                   3), round(float(tupl[1]), 3), idx))
        else:
            raise err.InvalidArgumentValueException(
                "Annotations type not understood")
    return segments
def compute_full_kernels(max_size, convolution_type="full"):
    """
    Kernel with diagonals equal to 1. Shouldn't be used.
    """
    kernels = [[0]]
    for p in range(1, max_size + 1):
        if p < 4:
            kern = np.ones((p, p))
        else:
            if convolution_type == "full":
                # Full kernel
                kern = np.ones((p, p))
            elif convolution_type == "eight_bands":
                # Diagonal where only the eight subdiagonals surrounding the main diagonal is one
                k = np.array([
                    np.ones(p - 4),
                    np.ones(p - 3),
                    np.ones(p - 2),
                    np.ones(p - 1),
                    np.ones(p),
                    np.ones(p - 1),
                    np.ones(p - 2),
                    np.ones(p - 3),
                    np.ones(p - 4)
                ])
                offset = [-4, -3, -2, -1, 0, 1, 2, 3, 4]
                kern = diags(k, offset).toarray()
            elif convolution_type == "mixed":
                # Sum of both previous kernels
                k = np.array([
                    np.ones(p - 4),
                    np.ones(p - 3),
                    np.ones(p - 2),
                    np.ones(p - 1),
                    np.ones(p),
                    np.ones(p - 1),
                    np.ones(p - 2),
                    np.ones(p - 3),
                    np.ones(p - 4)
                ])
                offset = [-4, -3, -2, -1, 0, 1, 2, 3, 4]
                kern = np.ones(
                    (p, p)) + np.identity(p) + diags(k, offset).toarray()
            else:
                raise err.InvalidArgumentValueException(
                    "Convolution type not understood.")
        kernels.append(kern)
    return kernels
Esempio n. 4
0
def get_annotation_name_from_song(song_number, annotations_type):
    """
    Returns the name of the annotation of this song according to the desired annotation type
    
    Specificly designed for RWC Pop dataset, shouldn't be used otherwise.
    For now are available:
        - AIST annotations [1]
        - MIREX 10 annotations [2]
    
    Parameters
    ----------
    song_number : integer or string
        The number of the song (which is its name).
    annotations_type : string
        The desired type of annotation.

    Raises
    ------
    InvalidArgumentValueException
        If the annotatipn type is not implemented.

    Returns
    -------
    string
        The name of the file containing the annotation.
        
    References
    ----------
    [1] Goto, M. (2006, October). AIST Annotation for the RWC Music Database. In ISMIR (pp. 359-360).
    
    [2] Bimbot, F., Sargent, G., Deruty, E., Guichaoua, C., & Vincent, E. (2014, January). 
    Semiotic description of music structure: An introduction to the Quaero/Metiss structural annotations.

    """
    if annotations_type == "MIREX10":
        return "RM-P{:03d}.BLOCKS.lab".format(int(song_number))
    elif annotations_type == "AIST":
        return "RM-P{:03d}.CHORUS.TXT".format(int(song_number))
    else:
        raise err.InvalidArgumentValueException(
            "Annotations type not understood")
Esempio n. 5
0
def get_spectrogram(signal,
                    sr,
                    feature,
                    hop_length,
                    n_fft=2048,
                    fmin=98,
                    n_mfcc=20):
    """
    Returns a spectrogram, from the signal.
    Different types of spectrogram can be computed, and it's specified by the argument "feature".
        
    All these spectrograms are computed by using the toolbox librosa [1].
    
    Parameters
    ----------
    signal : numpy array
        Signal of the song.
    sr : float
        Sampling rate of the signal, generally 44100Hz.
    feature : String
        The types of spectrograms to compute.
            - stft : computes the Short-Time Fourier Transform of the signal.
            - pcp : computes a chromagram.
            NB: this chromagram has been specificly fitted as a team, 
            and the arguments are non standard but rather technical choices.
            - pcp_stft : computes a chromagram from the stft of the song.
            - cqt : computes a Constant-Q transform of the song.
            - tonnetz : computes the tonnetz representation of the song.
            - pcp_tonnetz : computes the tonnetz representation of the song, starting from the chromas.
                It allows us to better control paramaters over the computation of tonnetz, 
                and can reduce computation when chromas are already computed (for scripts loading already computed spectrograms).
            - mfcc : computes the Mel-Frequency Cepstral Coefficients of the song.
            - mel : computes the mel-spectrogram of the song.

    hop_length : integer
        The desired hop_length, which is the step between two frames (ie the time "discretization" step)
        It is expressed in terms of number of samples, which are defined by the sampling rate.
    n_fft : integer, optional
        Number of frames by stft feature.
        The default is 2048.
    fmin : integer, optional
        The minimal frequence to consider, used for denoizing.
        The default is 98.
    n_mfcc : integer, optional
        Number of mfcc features.
        The default is 20 (as in librosa).

    Raises
    ------
    InvalidArgumentValueException
        If the "feature" argument is not presented above.

    Returns
    -------
    numpy array
        Spectrogram of the signal.
        
    References
    ----------
    [1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July).
    librosa: Audio and music signal analysis in python. 
    In Proceedings of the 14th python in science conference (Vol. 8).
    
    [2] Nieto, O., & Bello, J. P. (2015). 
    Msaf: Music structure analytis framework. 
    In Proceedings of 16th International Society for Music Information Retrieval Conference (ISMIR 2015).

    """
    if feature.lower() == "stft":
        if len(signal.shape) == 1:
            stft = librosa.core.stft(np.asfortranarray(signal),
                                     n_fft=n_fft,
                                     hop_length=hop_length)
            power_spectrogram = np.abs(stft)**2
            return power_spectrogram

        power_spectrogram = np.abs(
            librosa.core.stft(np.asfortranarray(signal[:, 0]),
                              n_fft=n_fft,
                              hop_length=hop_length))**2
        for i in range(1, signal.shape[1]):
            power_spectrogram += np.abs(
                librosa.core.stft(np.asfortranarray(signal[:, i]),
                                  n_fft=n_fft,
                                  hop_length=hop_length))**2
        return power_spectrogram

    elif feature.lower() == "pcp_stft":
        if len(signal.shape) == 1:
            audio_harmonic, _ = librosa.effects.hpss(
                y=np.asfortranarray(signal))
            chroma_stft = librosa.feature.chroma_stft(y=audio_harmonic,
                                                      sr=sr,
                                                      n_fft=n_fft,
                                                      hop_length=hop_length)
            return chroma_stft
        audio_harmonic, _ = librosa.effects.hpss(
            y=np.asfortranarray(signal[:, 0]))
        chroma_stft = librosa.feature.chroma_stft(y=audio_harmonic,
                                                  sr=sr,
                                                  n_fft=n_fft,
                                                  hop_length=hop_length)
        for i in range(1, signal.shape[1]):
            audio_harmonic, _ = librosa.effects.hpss(
                y=np.asfortranarray(signal[:, i]))
            chroma_stft += librosa.feature.chroma_stft(y=audio_harmonic,
                                                       sr=sr,
                                                       n_fft=n_fft,
                                                       hop_length=hop_length)
        return chroma_stft
    elif feature == "pcp":
        norm = inf  # Columns normalization
        win_len_smooth = 82  # Size of the smoothign window
        n_octaves = 6
        bins_per_chroma = 3
        bins_per_octave = bins_per_chroma * 12
        if len(signal.shape) == 1:
            return librosa.feature.chroma_cens(y=np.asfortranarray(signal),
                                               sr=sr,
                                               hop_length=hop_length,
                                               fmin=fmin,
                                               n_chroma=12,
                                               n_octaves=n_octaves,
                                               bins_per_octave=bins_per_octave,
                                               norm=norm,
                                               win_len_smooth=win_len_smooth)

        pcp = librosa.feature.chroma_cens(y=np.asfortranarray(signal[:, 0]),
                                          sr=sr,
                                          hop_length=hop_length,
                                          fmin=fmin,
                                          n_chroma=12,
                                          n_octaves=n_octaves,
                                          bins_per_octave=bins_per_octave,
                                          norm=norm,
                                          win_len_smooth=win_len_smooth)
        for i in range(1, signal.shape[1]):
            pcp += librosa.feature.chroma_cens(y=np.asfortranarray(signal[:,
                                                                          i]),
                                               sr=sr,
                                               hop_length=hop_length,
                                               fmin=fmin,
                                               n_chroma=12,
                                               n_octaves=n_octaves,
                                               bins_per_octave=bins_per_octave,
                                               norm=norm,
                                               win_len_smooth=win_len_smooth)

        return pcp
    elif feature.lower() == "cqt":
        if len(signal.shape) == 1:
            constant_q_transf = librosa.core.cqt(np.asfortranarray(signal),
                                                 sr=sr,
                                                 hop_length=hop_length)
            power_cqt = np.abs(constant_q_transf)**2
            return power_cqt
        power_cqt = np.abs(
            librosa.core.cqt(np.asfortranarray(signal[:, 0]),
                             sr=sr,
                             hop_length=hop_length))**2
        for i in range(1, signal.shape[1]):
            power_cqt += np.abs(
                librosa.core.cqt(np.asfortranarray(signal[:, i]),
                                 sr=sr,
                                 hop_length=hop_length))**2
        return power_cqt
    elif feature.lower() == "log_cqt":
        if len(signal.shape) == 1:
            constant_q_transf = librosa.core.cqt(np.asfortranarray(signal),
                                                 sr=sr,
                                                 hop_length=hop_length)
            power_cqt = np.abs(constant_q_transf)**2
            log_cqt = ((1.0 / 80.0) * librosa.core.amplitude_to_db(
                np.abs(np.array(power_cqt)), ref=np.max)) + 1.0
            return log_cqt
        power_cqt = np.abs(
            librosa.core.cqt(np.asfortranarray(signal[:, 0]),
                             sr=sr,
                             hop_length=hop_length))**2
        for i in range(1, signal.shape[1]):
            power_cqt += np.abs(
                librosa.core.cqt(np.asfortranarray(signal[:, i]),
                                 sr=sr,
                                 hop_length=hop_length))**2
        log_cqt = ((1.0 / 80.0) * librosa.core.amplitude_to_db(
            np.abs(np.array(power_cqt)), ref=np.max)) + 1.0
        return log_cqt
    elif feature.lower() == "tonnetz":
        if len(signal.shape) == 1:
            return librosa.feature.tonnetz(np.asfortranarray(signal), sr=sr)
        tonnetz = librosa.feature.tonnetz(np.asfortranarray(signal[:, 0]),
                                          sr=sr)
        for i in range(1, signal.shape[1]):
            tonnetz += librosa.feature.tonnetz(np.asfortranarray(signal[:, i]),
                                               sr=sr)
        return tonnetz
    elif feature.lower() == "pcp_tonnetz":
        return librosa.feature.tonnetz(y=None,
                                       sr=None,
                                       chroma=get_spectrogram(signal,
                                                              sr,
                                                              "pcp",
                                                              hop_length,
                                                              fmin=fmin))
    elif feature.lower() == "hcqt":
        return my_compute_hcqt(np.asfortranarray(signal[:, 0]), sr)

    elif feature.lower() == "mfcc":
        if len(signal.shape) == 1:
            return librosa.feature.mfcc(np.asfortranarray(signal),
                                        sr=sr,
                                        hop_length=hop_length,
                                        n_mfcc=n_mfcc)
        mfcc = librosa.feature.mfcc(np.asfortranarray(signal[:, 0]),
                                    sr=sr,
                                    hop_length=hop_length,
                                    n_mfcc=n_mfcc)
        for i in range(1, signal.shape[1]):
            mfcc += librosa.feature.mfcc(np.asfortranarray(signal[:, i]),
                                         sr=sr,
                                         hop_length=hop_length,
                                         n_mfcc=n_mfcc)
        return mfcc

    # For Mel spectrograms, we use the same parameters as the ones of [1].
    # [1]Grill, Thomas, and Jan Schlüter. "Music Boundary Detection Using Neural Networks on Combined Features and Two-Level Annotations." ISMIR. 2015.
    elif feature.lower() == "mel_grill":
        if len(signal.shape) == 1:
            return np.abs(
                librosa.feature.melspectrogram(np.asfortranarray(signal),
                                               sr=sr,
                                               n_fft=2048,
                                               hop_length=hop_length,
                                               n_mels=80,
                                               fmin=80.0,
                                               fmax=16000))
        mel = np.abs(
            librosa.feature.melspectrogram(np.asfortranarray(signal[:, 0]),
                                           sr=sr,
                                           n_fft=2048,
                                           hop_length=hop_length,
                                           n_mels=80,
                                           fmin=80.0,
                                           fmax=16000))
        for i in range(1, signal.shape[1]):
            mel += np.abs(
                librosa.feature.melspectrogram(np.asfortranarray(signal[:, i]),
                                               sr=sr,
                                               n_fft=2048,
                                               hop_length=hop_length,
                                               n_mels=80,
                                               fmin=80.0,
                                               fmax=16000))
        return mel

    elif feature == "log_mel_grill":
        if len(signal.shape) == 1:
            return librosa.power_to_db(
                np.abs(
                    librosa.feature.melspectrogram(np.asfortranarray(signal),
                                                   sr=sr,
                                                   n_fft=2048,
                                                   hop_length=hop_length,
                                                   n_mels=80,
                                                   fmin=80.0,
                                                   fmax=16000)))
        mel = np.abs(
            librosa.feature.melspectrogram(np.asfortranarray(signal[:, 0]),
                                           sr=sr,
                                           n_fft=2048,
                                           hop_length=hop_length,
                                           n_mels=80,
                                           fmin=80.0,
                                           fmax=16000))
        for i in range(1, signal.shape[1]):
            mel += np.abs(
                librosa.feature.melspectrogram(np.asfortranarray(signal[:, i]),
                                               sr=sr,
                                               n_fft=2048,
                                               hop_length=hop_length,
                                               n_mels=80,
                                               fmin=80.0,
                                               fmax=16000))
        return librosa.power_to_db(mel)

    elif feature == "nn_log_mel_grill":
        if len(signal.shape) == 1:
            mel = np.abs(
                librosa.feature.melspectrogram(np.asfortranarray(signal),
                                               sr=sr,
                                               n_fft=2048,
                                               hop_length=hop_length,
                                               n_mels=80,
                                               fmin=80.0,
                                               fmax=16000))
            return librosa.power_to_db(mel + np.ones(mel.shape))
        mel = np.abs(
            librosa.feature.melspectrogram(np.asfortranarray(signal[:, 0]),
                                           sr=sr,
                                           n_fft=2048,
                                           hop_length=hop_length,
                                           n_mels=80,
                                           fmin=80.0,
                                           fmax=16000))
        for i in range(1, signal.shape[1]):
            mel += np.abs(
                librosa.feature.melspectrogram(np.asfortranarray(signal[:, i]),
                                               sr=sr,
                                               n_fft=2048,
                                               hop_length=hop_length,
                                               n_mels=80,
                                               fmin=80.0,
                                               fmax=16000))
        return librosa.power_to_db(mel + np.ones(mel.shape))

    # elif feature == "padded_log_mel_grill":
    #     log_mel = get_spectrogram(signal, sr, "log_mel_grill", hop_length, n_fft = n_fft)
    #     return log_mel - np.amin(log_mel) * np.ones(log_mel.shape)

    elif feature == "mel" or feature == "log_mel":
        raise err.InvalidArgumentValueException(
            "Invalid mel parameter, are't you looking for mel_grill?")
    else:
        raise err.InvalidArgumentValueException(
            "Unknown signal representation.")
def compute_all_kernels(max_size, convolution_type="full"):
    """
    Precomputes all kernels of size 0 ([0]) to max_size, and feed them to the Dynamic Progamming algorithm.

    Parameters
    ----------
    max_size : integer
        The maximal size (included) for kernels.
    convolution_type: string
        The type of convolution. (to explicit)
        Possibilities are :
            - "full" : squared matrix entirely composed of one, except on the diagonal where it's zero.
            The associated convolution cost for a segment $(b_1, b_2)$ will be:
            $c_{b_1,b_2} = \frac{1}{b_2 - b_1 + 1}\sum_{i,j = 0, i \ne j}^{n - 1}  a_{i + b_1, j + b_1}$
            - "eight_bands" : squared matrix where the only nonzero values are ones on the 
            8 subdiagonals surrounding the main diagonal.
            The associated convolution cost for a segment $(b_1, b_2)$ will be:
            $c_{b_1,b_2} = \frac{1}{b_2 - b_1 + 1}\sum_{i,j = 0, 1 \leq |i - j| \leq 4}^{n - 1}  a_{i + b_1, j + b_1}$
            - "mixed" : sum of both previous kernels, i.e. values are zero on the diagonal,
            2 on the 8 subdiagonals surrounding the main diagonal, and 1 elsewhere.
            The associated convolution cost for a segment $(b_1, b_2)$ will be:
            $c_{b_1,b_2} = \frac{1}{b_2 - b_1 + 1}(2*\sum_{i,j = 0, 1 \leq |i - j| \leq 4}^{n - 1}  a_{i + b_1, j + b_1} + \sum_{i,j = 0, |i - j| > 4}^{n - 1}  a_{i + b_1, j + b_1})$
        
    Returns
    -------
    kernels : array of arrays (which are kernels)
        All the kernels, of size 0 ([0]) to max_size.

    """
    kernels = [[0]]
    for p in range(1, max_size + 1):
        if p < 4:
            kern = np.ones((p, p)) - np.identity(p)
        else:
            if convolution_type == "full":
                # Full kernel (except for the diagonal)
                kern = np.ones((p, p)) - np.identity(p)
            elif convolution_type == "eight_bands":
                # Diagonal where only the eight subdiagonals surrounding the main diagonal is one
                k = np.array([
                    np.ones(p - 4),
                    np.ones(p - 3),
                    np.ones(p - 2),
                    np.ones(p - 1),
                    np.zeros(p),
                    np.ones(p - 1),
                    np.ones(p - 2),
                    np.ones(p - 3),
                    np.ones(p - 4)
                ])
                offset = [-4, -3, -2, -1, 0, 1, 2, 3, 4]
                kern = diags(k, offset).toarray()
            elif convolution_type == "mixed":
                # Sum of both previous kernels
                k = np.array([
                    np.ones(p - 4),
                    np.ones(p - 3),
                    np.ones(p - 2),
                    np.ones(p - 1),
                    np.zeros(p),
                    np.ones(p - 1),
                    np.ones(p - 2),
                    np.ones(p - 3),
                    np.ones(p - 4)
                ])
                offset = [-4, -3, -2, -1, 0, 1, 2, 3, 4]
                kern = np.ones(
                    (p, p)) - np.identity(p) + diags(k, offset).toarray()
            else:
                raise err.InvalidArgumentValueException(
                    "Convolution type not understood.")
        kernels.append(kern)
    return kernels
def dynamic_convolution_computation(autosimilarity,
                                    mix=1,
                                    min_size=1,
                                    max_size=36,
                                    novelty_kernel_size=16,
                                    penalty_weight=1,
                                    penalty_func="modulo4",
                                    convolution_type="eight_bands"):
    """
    Dynamic programming algorithm, computing a maximization of a cost, sum of segments' costs on the autosimilarity.
    This cost is a combination of
     - the convolutionnal cost on the segment, with a dynamic size, 
     - a penalty cost, function of the size of the segment, to enforce specific sizes (with prior knowledge),
     - the novelty cost applied on the end of the segment, with a fixed kernel size.
        EDIT: Not supported anymore (left for potential comparison tests).
     
    The penalty cost is computed in the function "penalty_cost_from_arg()".
    See this function for further details.
     
    This trade-off is handled by the <mix> parameter, with:
        cost = mix * convolutionnal cost + (1 - mix) * novelty cost
    EDIT: this behavior is not supported anymore, but could be in the future.
     
    It returns the optimal segmentation according to this cost.

    Parameters
    ----------
    autosimilarity : list of list of float (list of columns)
        The autosimilarity to segment.
    mix : float \in (0,1], optional
        The trade-off parameter between convolutionnal cost and novelty cost.
        It shouldn't be set to zero as it correspond to the basic novelty cost.
        The default is 0.5.
    min_size : integer, optional
        The minimal length of segments.
        The default is 1.
    max_size : integer, optional
        The maximal length of segments.
        The default is 36.
    novelty_kernel_size : integer, optional
        The size of the novelty_kernel.
        The default is 12.
        EDIT: Not supported anymore (left for potential comparison tests)
    penalty_weight : float, optional
        The ponderation parameter for the penalty function
    penalty_func : string
        The type of penalty function to use.
        See "penalty_cost_from_arg()" for further details.
    convolution_type : string
        The type of convolution we want to use in this computation.
        See "compute_all_kernels()" for a detailed list of possibilities.

    Raises
    ------
    InvalidArgumentValueException and ToDebugException
        Errors.

    Returns
    -------
    list of tuples
        The segments, as a list of tuples (start, end).
    integer
        Global cost (the minimal among all).

    """
    if novelty_kernel_size % 2 == 1:
        raise err.InvalidArgumentValueException(
            "The novelty kernel should be even.") from None
    if mix < 0 or mix > 1:
        raise err.InvalidArgumentValueException(
            "Mix is a weight, between 0 and 1, to mitigate between convolutionnal and novelty cost."
        ) from None
    if mix == 0:
        raise err.InvalidArgumentValueException(
            "As novelty cost use a fixed kernel, a 0 cost, neutralizing the convolutionnal cost, shouldn't be used."
        ) from None

    costs = [-math.inf for i in range(len(autosimilarity))]
    segments_best_ends = [None for i in range(len(autosimilarity))]
    segments_best_ends[0] = 0
    costs[0] = 0
    kernels = compute_all_kernels(max_size, convolution_type=convolution_type)
    #novelty = novelty_computation(autosimilarity, novelty_kernel_size)
    conv_eight = convolution_entire_matrix_computation(autosimilarity, kernels)

    for current_idx in range(
            1, len(autosimilarity)):  # Parse all indexes of the autosimilarity
        for possible_start_idx in possible_segment_start(current_idx,
                                                         min_size=min_size,
                                                         max_size=max_size):
            if possible_start_idx < 0:
                raise err.ToDebugException("Invalid value of start index.")

            # Convolutionnal cost between the possible start of the segment and the current index (entire segment)
            conv_cost = convolutionnal_cost(
                autosimilarity[possible_start_idx:current_idx,
                               possible_start_idx:current_idx], kernels)

            # Novelty cost, computed with a fixed kernel (doesn't make sense otherwise), on the end of the segment
            #nov_cost = novelty[current_idx]

            segment_length = current_idx - possible_start_idx
            penalty_cost = penalty_cost_from_arg(penalty_func, segment_length)

            # Formula with the mix argument
            #this_segment_cost = (mix * conv_cost + (1 - mix) * nov_cost) * segment_length - penalty_cost * penalty_weight * np.max(conv_eight)
            # Clean formula, to avoid errors.
            this_segment_cost = conv_cost * segment_length - penalty_cost * penalty_weight * np.max(
                conv_eight)
            # Note: the length of the segment does not appear in conv_eight (not a problem in itself as size is contant, but generally not specified in formulas).

            # Avoiding errors, as segment_cost are initially set to -inf.
            if possible_start_idx == 0:
                if this_segment_cost > costs[current_idx]:
                    costs[current_idx] = this_segment_cost
                    segments_best_ends[current_idx] = 0
            else:
                if costs[possible_start_idx] + this_segment_cost > costs[
                        current_idx]:
                    costs[current_idx] = costs[
                        possible_start_idx] + this_segment_cost
                    segments_best_ends[current_idx] = possible_start_idx

    segments = [(segments_best_ends[len(autosimilarity) - 1],
                 len(autosimilarity) - 1)]
    precedent_end = segments_best_ends[len(autosimilarity) - 1]
    while precedent_end > 0:
        segments.append((segments_best_ends[precedent_end], precedent_end))
        precedent_end = segments_best_ends[precedent_end]
        if precedent_end == None:
            raise err.ToDebugException(
                "Well... Viterbi took an impossible path, so it failed. Understand why."
            ) from None
    return segments[::-1], costs[-1]
Esempio n. 8
0
def fixed_conditions_feature(dataset,
                             feature,
                             penalty_weight,
                             annotations_type="MIREX10",
                             subdivision=96,
                             penalty_func="modulo8",
                             legend=", on test dataset.",
                             convolution_type="eight_bands"):
    """
    Segmentation results when segmenting directly the signal, where the penalty_weight is fixed before computation.
    """
    if dataset == "full":
        dataset_path = paths.path_entire_rwc
    elif dataset == "odd_songs":
        dataset_path = paths.path_odd_songs_rwc
    elif dataset == "even_songs":
        dataset_path = paths.path_even_songs_rwc
    elif dataset == "debug":
        dataset_path = paths.path_debug_rwc
    else:
        raise err.InvalidArgumentValueException(
            f"Dataset type not understood: {dataset}") from None

    list_songs = scr.load_RWC_dataset(dataset_path, annotations_type)
    annotations_folder = "{}/{}".format(paths.path_annotation_rwc,
                                        annotations_type)
    hop_length = 32
    hop_length_seconds = 32 / 44100
    zero_five = []
    three = []

    for song_and_annotations in list_songs:
        song_name = song_and_annotations[0].replace(".wav", "")
        annot_path = "{}/{}".format(annotations_folder,
                                    song_and_annotations[1])
        annotations = dm.get_segmentation_from_txt(annot_path,
                                                   annotations_type)
        references_segments = np.array(annotations)[:, 0:2]

        bars, spectrogram = scr.load_or_save_spectrogram_and_bars(
            paths.path_data_persisted_rwc,
            "{}/{}".format(dataset_path, song_name), feature, hop_length)

        tensor_spectrogram = tf.tensorize_barwise(spectrogram, bars,
                                                  hop_length_seconds,
                                                  subdivision)

        unfolded = tl.unfold(tensor_spectrogram, 2)

        autosimilarity = as_seg.get_autosimilarity(unfolded,
                                                   transpose=True,
                                                   normalize=True)

        segments = as_seg.dynamic_convolution_computation(
            autosimilarity,
            penalty_weight=penalty_weight,
            penalty_func=penalty_func,
            convolution_type=convolution_type)[0]
        segments_in_time = dm.segments_from_bar_to_time(segments, bars)

        tp, fp, fn = dm.compute_rates_of_segmentation(references_segments,
                                                      segments_in_time,
                                                      window_length=0.5)
        prec, rap, f_mes = dm.compute_score_of_segmentation(
            references_segments, segments_in_time, window_length=0.5)
        zero_five.append(
            [tp, fp, fn,
             round(prec, 4),
             round(rap, 4),
             round(f_mes, 4)])

        tp, fp, fn = dm.compute_rates_of_segmentation(references_segments,
                                                      segments_in_time,
                                                      window_length=3)
        prec, rap, f_mes = dm.compute_score_of_segmentation(
            references_segments, segments_in_time, window_length=3)
        three.append(
            [tp, fp, fn,
             round(prec, 4),
             round(rap, 4),
             round(f_mes, 4)])

    final_res_sig_zero_five = np.array(
        [np.mean(np.array(zero_five)[:, i]) for i in range(6)])
    dataframe = pd.DataFrame(
        final_res_sig_zero_five,
        columns=["Results with 0.5 seconds tolerance window{}".format(legend)],
        index=np.array([
            'True Positives', 'False Positives', 'False Negatives',
            'Precision', 'Recall', 'F measure'
        ]))
    display(dataframe.T)

    final_res_sig_three = np.array(
        [np.mean(np.array(three)[:, i]) for i in range(6)])
    dataframe = pd.DataFrame(
        final_res_sig_three,
        columns=["Results with 3 seconds tolerance window{}".format(legend)],
        index=np.array([
            'True Positives', 'False Positives', 'False Negatives',
            'Precision', 'Recall', 'F measure'
        ]))
    display(dataframe.T)

    return final_res_sig_zero_five, final_res_sig_three
Esempio n. 9
0
def final_results_fixed_conditions(dataset,
                                   feature,
                                   ranks,
                                   penalty_weight,
                                   init="tucker",
                                   update_rule="hals",
                                   beta=None,
                                   n_iter_max=1000,
                                   annotations_type="MIREX10",
                                   subdivision=96,
                                   penalty_func="modulo8",
                                   convolution_type="eight_bands",
                                   legend="in unknown conditions."):
    """
    Segmentation results when ranks and penalty_weight are fixed before computation.
    """
    annotations_folder = "{}/{}".format(paths.path_annotation_rwc,
                                        annotations_type)
    if dataset == "full":
        dataset_path = paths.path_entire_rwc
    elif dataset == "odd_songs":
        dataset_path = paths.path_odd_songs_rwc
    elif dataset == "even_songs":
        dataset_path = paths.path_even_songs_rwc
    elif dataset == "debug":
        dataset_path = paths.path_debug_rwc
    else:
        raise err.InvalidArgumentValueException(
            f"Dataset type not understood: {dataset}") from None

    list_songs = scr.load_RWC_dataset(dataset_path, annotations_type)
    hop_length = 32
    hop_length_seconds = 32 / 44100
    zero_five_results = []
    three_results = []
    deviation = []

    for song_and_annotations in list_songs:
        song_number = song_and_annotations[0].replace(".wav", "")

        annot_path = "{}/{}".format(annotations_folder,
                                    song_and_annotations[1])
        annotations = dm.get_segmentation_from_txt(annot_path,
                                                   annotations_type)
        references_segments = np.array(annotations)[:, 0:2]

        bars, spectrogram = scr.load_or_save_spectrogram_and_bars(
            paths.path_data_persisted_rwc, f"{dataset_path}/{song_number}",
            feature, hop_length)

        tensor_spectrogram = tf.tensorize_barwise(spectrogram, bars,
                                                  hop_length_seconds,
                                                  subdivision)
        if update_rule == "hals":
            persisted_arguments = f"_{song_number}_{feature}_{init}_{subdivision}"
        elif update_rule == "mu":
            persisted_arguments = f"mu_slow_{song_number}_beta{beta}_{feature}_{init}_{subdivision}_n_iter_max{n_iter_max}"
        else:
            raise err.InvalidArgumentValueException(
                f"Update rule type not understood: {update_rule}")

        q_factor = scr.NTD_decomp_as_script(paths.path_data_persisted_rwc,
                                            persisted_arguments,
                                            tensor_spectrogram,
                                            ranks,
                                            init=init,
                                            update_rule=update_rule,
                                            beta=beta)[1][2]
        autosimilarity = as_seg.get_autosimilarity(q_factor,
                                                   transpose=True,
                                                   normalize=True)

        segments = as_seg.dynamic_convolution_computation(
            autosimilarity,
            penalty_weight=penalty_weight,
            penalty_func=penalty_func,
            convolution_type=convolution_type)[0]
        segments_in_time = dm.segments_from_bar_to_time(segments, bars)

        tp, fp, fn = dm.compute_rates_of_segmentation(references_segments,
                                                      segments_in_time,
                                                      window_length=0.5)
        prec, rap, f_mes = dm.compute_score_of_segmentation(
            references_segments, segments_in_time, window_length=0.5)
        zero_five_results.append(
            [tp, fp, fn,
             round(prec, 4),
             round(rap, 4),
             round(f_mes, 4)])

        tp, fp, fn = dm.compute_rates_of_segmentation(references_segments,
                                                      segments_in_time,
                                                      window_length=3)
        prec, rap, f_mes = dm.compute_score_of_segmentation(
            references_segments, segments_in_time, window_length=3)
        three_results.append(
            [tp, fp, fn,
             round(prec, 4),
             round(rap, 4),
             round(f_mes, 4)])

        r_to_e, e_to_r = dm.compute_median_deviation_of_segmentation(
            references_segments, segments_in_time)
        deviation.append([r_to_e, e_to_r])

    results_at_zero_five = np.array(
        [np.mean(np.array(zero_five_results)[:, i]) for i in range(6)])
    dataframe_zero_five = pd.DataFrame(
        results_at_zero_five,
        index=[
            'True Positives', 'False Positives', 'False Negatives',
            'Precision', 'Recall', 'F measure'
        ],
        columns=[
            f"Results of {feature} with 0.5 seconds tolerance window {legend}"
        ])
    display(dataframe_zero_five.T)

    results_at_three = np.array(
        [np.mean(np.array(three_results)[:, i]) for i in range(6)])
    dataframe_three = pd.DataFrame(
        results_at_three,
        index=[
            'True Positives', 'False Positives', 'False Negatives',
            'Precision', 'Recall', 'F measure'
        ],
        columns=[
            f"Results of {feature} with 3 seconds tolerance window {legend}"
        ])
    display(dataframe_three.T)

    # mean_deviation = np.array([np.mean(np.array(deviation)[:,i]) for i in range(2)])
    # dataframe_deviation = pd.DataFrame(mean_deviation, index = ['Reference to Estimation mean deviation','Estimation to Reference mean deviation'], columns = ["Mean deviation between estimations and references{}".format(legend)])
    # display(dataframe_deviation.T)

    return results_at_zero_five, results_at_three
Esempio n. 10
0
def several_ranks_with_cross_validation_of_param_RWC(
        learning_dataset,
        testing_dataset,
        feature,
        ranks_frequency,
        ranks_rhythm,
        ranks_pattern,
        penalty_range,
        init="tucker",
        update_rule="hals",
        beta=None,
        n_iter_max=1000,
        annotations_type="MIREX10",
        penalty_func="modulo8",
        convolution_type="eight_bands"):
    """
    Segmentation results when ranks and penalty parameter are fitted by cross validation.
    Results are shown for the test dataset.
    """
    if learning_dataset == "odd_songs":
        learning_dataset_path = paths.path_odd_songs_rwc
    elif learning_dataset == "even_songs":
        learning_dataset_path = paths.path_even_songs_rwc
    elif learning_dataset == "debug":
        learning_dataset_path = paths.path_debug_rwc
    else:
        raise err.InvalidArgumentValueException(
            f"Dataset type not understood: {learning_dataset}") from None

    if testing_dataset == "odd_songs":
        testing_dataset_path = paths.path_odd_songs_rwc
    elif testing_dataset == "even_songs":
        testing_dataset_path = paths.path_even_songs_rwc
    elif testing_dataset == "debug":
        testing_dataset_path = paths.path_debug_rwc
    else:
        raise err.InvalidArgumentValueException(
            f"Dataset type not understood: {testing_dataset_path}") from None

    if learning_dataset == testing_dataset:
        warnings.warn(
            "Careful: using the same dataset as test and learn, normal?")

    annotations_folder = "{}/{}".format(paths.path_annotation_rwc,
                                        annotations_type)
    hop_length = 32
    hop_length_seconds = 32 / 44100
    subdivision = 96

    learning_dataset_songs = scr.load_RWC_dataset(learning_dataset_path,
                                                  annotations_type)

    zero_five = -math.inf * np.ones(
        (len(learning_dataset_songs), len(ranks_frequency), len(ranks_rhythm),
         len(ranks_pattern), len(penalty_range), 1))
    three = -math.inf * np.ones(
        (len(learning_dataset_songs), len(ranks_frequency), len(ranks_rhythm),
         len(ranks_pattern), len(penalty_range), 1))

    for song_idx, song_and_annotations in enumerate(learning_dataset_songs):
        #printmd('**Chanson courante: {}**'.format(song_and_annotations[0]))
        song_number = song_and_annotations[0].replace(".wav", "")

        annot_path = "{}/{}".format(annotations_folder,
                                    song_and_annotations[1])
        annotations = dm.get_segmentation_from_txt(annot_path,
                                                   annotations_type)
        references_segments = np.array(annotations)[:, 0:2]

        bars, spectrogram = scr.load_or_save_spectrogram_and_bars(
            paths.path_data_persisted_rwc,
            "{}/{}".format(learning_dataset_path,
                           song_number), feature, hop_length)

        tensor_spectrogram = tf.tensorize_barwise(spectrogram, bars,
                                                  hop_length_seconds,
                                                  subdivision)

        for w, rank_W in enumerate(ranks_frequency):
            for h, rank_h in enumerate(ranks_rhythm):
                for q, rank_q in enumerate(ranks_pattern):
                    ranks = [rank_W, rank_h, rank_q]
                    if update_rule == "hals":
                        persisted_arguments = f"_{song_number}_{feature}_{init}_{subdivision}"
                    elif update_rule == "mu":
                        persisted_arguments = f"mu_slow_{song_number}_beta{beta}_{feature}_{init}_{subdivision}_n_iter_max{n_iter_max}"
                    else:
                        raise err.InvalidArgumentValueException(
                            f"Update rule type not understood: {update_rule}")

                    q_factor = scr.NTD_decomp_as_script(
                        paths.path_data_persisted_rwc,
                        persisted_arguments,
                        tensor_spectrogram,
                        ranks,
                        init=init,
                        update_rule=update_rule,
                        beta=beta)[1][2]

                    autosimilarity = as_seg.get_autosimilarity(q_factor,
                                                               transpose=True,
                                                               normalize=True)

                    for p, penalty in enumerate(penalty_range):
                        segments = as_seg.dynamic_convolution_computation(
                            autosimilarity,
                            penalty_weight=penalty,
                            penalty_func=penalty_func,
                            convolution_type=convolution_type)[0]
                        segments_in_time = dm.segments_from_bar_to_time(
                            segments, bars)

                        prec, rap, f_mes = dm.compute_score_of_segmentation(
                            references_segments,
                            segments_in_time,
                            window_length=0.5)
                        zero_five[song_idx, w, h, q, p] = round(f_mes, 4)

                        prec, rap, f_mes = dm.compute_score_of_segmentation(
                            references_segments,
                            segments_in_time,
                            window_length=3)
                        three[song_idx, w, h, q, p] = round(f_mes, 4)

    best_mean = 0
    best_params = []
    for w, rank_W in enumerate(ranks_frequency):
        for h, rank_h in enumerate(ranks_rhythm):
            for q, rank_q in enumerate(ranks_pattern):
                for p, penalty in enumerate(penalty_range):
                    this_avg = np.mean(zero_five[:, w, h, q, p])
                    if this_avg > best_mean:
                        best_mean = this_avg
                        best_params = [rank_W, rank_h, rank_q, penalty]

    display(
        pd.DataFrame(np.array(
            [best_params[0], best_params[1], best_params[2], best_params[3]]),
                     index=[
                         'Best rank for $W$', 'Best rank for $H$',
                         'Best rank for $Q$',
                         'Best lambda: ponderation parameter.'
                     ],
                     columns=["Learned parameters"]).T)

    learned_ranks = [best_params[0], best_params[1], best_params[2]]
    results_at_zero_five, results_at_three = final_results_fixed_conditions(
        testing_dataset,
        feature,
        learned_ranks,
        best_params[3],
        init=init,
        update_rule=update_rule,
        beta=beta,
        n_iter_max=n_iter_max,
        annotations_type=annotations_type,
        penalty_func=penalty_func,
        legend="on test dataset.",
        convolution_type=convolution_type)

    return best_params, results_at_zero_five, results_at_three
Esempio n. 11
0
def load_or_save_spectrogram(persisted_path,
                             song_path,
                             feature,
                             hop_length,
                             fmin=98,
                             n_fft=2048,
                             n_mfcc=20):
    """
    Computes the spectrogram for this song, or load it if it were already computed.

    Parameters
    ----------
    persisted_path : string
        Path where the spectrogram should be found.
    song_path : string
        The path of the signal of the song.
    feature : string
        Feature of the spectrogram, part of the identifier of the spectrogram.
    hop_length : integer
        hop_length of the spectrogram, part of the identifier of the spectrogram.
    fmin : integer
        Minimal frequence for the spectrogram, part of the identifier of the spectrogram.
        The default is 98.

    Returns
    -------=
    spectrogram : numpy array
        The pre-computed spectorgram.
    """
    song_name = song_path.split("/")[-1].replace(".wav",
                                                 "").replace(".mp3", "")
    try:
        if "stft" in feature:
            if "nfft" not in feature:
                spectrogram = np.load(
                    "{}/spectrograms/{}_{}-nfft{}_stereo_{}.npy".format(
                        persisted_path, song_name, feature, n_fft, hop_length))
            else:
                spectrogram = np.load(
                    "{}/spectrograms/{}_{}_stereo_{}.npy".format(
                        persisted_path, song_name, feature, hop_length))
        elif feature == "mel" or feature == "log_mel":
            raise err.InvalidArgumentValueException(
                "Invalid mel parameter, are't you looking for mel_grill?")
        elif "mfcc" in feature:
            if "nmfcc" not in feature:
                spectrogram = np.load(
                    "{}/spectrograms/{}_{}-nmfcc{}_stereo_{}.npy".format(
                        persisted_path, song_name, feature, n_mfcc,
                        hop_length))
            else:
                spectrogram = np.load(
                    "{}/spectrograms/{}_{}_stereo_{}.npy".format(
                        persisted_path, song_name, feature, hop_length))
        elif feature == "pcp":
            spectrogram = np.load(
                "{}/spectrograms/{}_{}_stereo_{}_{}.npy".format(
                    persisted_path, song_name, feature, hop_length, fmin))
        else:
            spectrogram = np.load("{}/spectrograms/{}_{}_stereo_{}.npy".format(
                persisted_path, song_name, feature, hop_length))

    except FileNotFoundError:
        the_signal, original_sampling_rate = sf.read(song_path)
        #the_signal, original_sampling_rate = librosa.load(song_path)
        if original_sampling_rate != 44100:
            the_signal = librosa.core.resample(np.asfortranarray(the_signal),
                                               original_sampling_rate, 44100)
        if "stft" in feature:
            if "nfft" not in feature:
                spectrogram = features.get_spectrogram(the_signal,
                                                       44100,
                                                       feature,
                                                       hop_length,
                                                       n_fft=n_fft)
                np.save(
                    "{}/spectrograms/{}_{}-nfft{}_stereo_{}".format(
                        persisted_path, song_name, feature, n_fft, hop_length),
                    spectrogram)
                return spectrogram
            else:
                n_fft_arg = int(feature.split("nfft")[1])
                spectrogram = features.get_spectrogram(the_signal,
                                                       44100,
                                                       feature,
                                                       hop_length,
                                                       n_fft=n_fft_arg)
                np.save(
                    "{}/spectrograms/{}_{}_stereo_{}".format(
                        persisted_path, song_name, feature, hop_length),
                    spectrogram)
                return spectrogram
        if feature == "mel" or feature == "log_mel":
            raise err.InvalidArgumentValueException(
                "Invalid mel parameter, are't you looking for mel_grill?")
        if "mfcc" in feature:
            if "nmfcc" not in feature:
                spectrogram = features.get_spectrogram(the_signal,
                                                       44100,
                                                       "mfcc",
                                                       hop_length,
                                                       n_mfcc=n_mfcc)
                np.save(
                    "{}/spectrograms/{}_{}-nmfcc{}_stereo_{}".format(
                        persisted_path, song_name, feature, n_mfcc,
                        hop_length), spectrogram)
                return spectrogram
            else:
                n_mfcc_arg = int(feature.split("nmfcc")[1])
                spectrogram = features.get_spectrogram(the_signal,
                                                       44100,
                                                       "mfcc",
                                                       hop_length,
                                                       n_mfcc=n_mfcc_arg)
                np.save(
                    "{}/spectrograms/{}_{}_stereo_{}".format(
                        persisted_path, song_name, feature, hop_length),
                    spectrogram)
                return spectrogram
        if feature == "pcp_tonnetz":
            # If chromas are already computed, try to load them instead of recomputing them.
            chromas = load_or_save_spectrogram(persisted_path,
                                               song_path,
                                               "pcp",
                                               hop_length,
                                               fmin=fmin)
            spectrogram = librosa.feature.tonnetz(y=None,
                                                  sr=None,
                                                  chroma=chromas)
            np.save(
                "{}/spectrograms/{}_{}_stereo_{}_{}".format(
                    persisted_path, song_name, feature, hop_length, fmin),
                spectrogram)
            return spectrogram
        # if feature == "tonnetz":
        #     hop_length = "fixed"
        #     fmin = "fixed"
        if feature == "pcp":
            # If it wasn't pcp_tonnetz, compute the spectrogram, and then save it.
            spectrogram = features.get_spectrogram(the_signal,
                                                   44100,
                                                   feature,
                                                   hop_length,
                                                   fmin=fmin)
            np.save(
                "{}/spectrograms/{}_{}_stereo_{}_{}".format(
                    persisted_path, song_name, feature, hop_length, fmin),
                spectrogram)
            return spectrogram

        spectrogram = features.get_spectrogram(the_signal, 44100, feature,
                                               hop_length)
        np.save(
            "{}/spectrograms/{}_{}_stereo_{}".format(persisted_path, song_name,
                                                     feature, hop_length),
            spectrogram)
        return spectrogram

    return spectrogram
Esempio n. 12
0
def get_spectrogram(signal, sr, feature, hop_length, n_fft=2048, fmin=98):
    """
    Returns a spectrogram, from the signal.
    Different types of spectrogram can be computed, and it's specified by the argument "feature".
        
    All these spectrograms are computed by using the toolbox librosa [1].
    
    Parameters
    ----------
    signal : numpy array
        Signal of the song.
        At this time, for most songs, the signal must be single-channel/mono signal.
    sr : float
        Sampling rate of the signal, generally 44100Hz.
    feature : String
        The types of spectrograms to compute.
            - stft : computes the Short-Time Fourier Transform of the signal.
            - pcp : computes a chromagram.
            NB: this chromagram has been specificly fitted as a team, 
            and the arguments are non standard but rather technical choices.
            - pcp_stft : computes a chromagram from the stft of the song.
            - cqt : computes a Constant-Q transform of the song.
            - tonnetz : computes the tonnetz representation of the song.
            - pcp_tonnetz : computes the tonnetz representation of the song, starting from the chromas.
                It allows us to better control paramaters over the computation of tonnetz, 
                and can reduce computation when chromas are already computed (for scripts loading already computed spectrograms).
    hop_length : integer
        The desired hop_length, which is the step between two frames (ie the time "discretization" step)
        It is expressed in terms of number of samples, which are defined by the sampling rate.
    n_fft : integer, optional
        Number of frames by stft feature.
        The default is 2048.
    fmin : integer, optional
        The minimal frequence to consider, used for denoizing.
        The default is 98.

    Raises
    ------
    InvalidArgumentValueException
        If the "feature" argument is not presented above.

    Returns
    -------
    numpy array
        Spectrogram of the signal.
        
    References
    ----------
    [1] McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015, July).
    librosa: Audio and music signal analysis in python. 
    In Proceedings of the 14th python in science conference (Vol. 8).
    
    [2] Nieto, O., & Bello, J. P. (2015). 
    Msaf: Music structure analytis framework. 
    In Proceedings of 16th International Society for Music Information Retrieval Conference (ISMIR 2015).

    """
    if feature.lower() == "stft":
        if len(signal.shape) == 1:
            stft = librosa.core.stft(np.asfortranarray(signal),
                                     n_fft=n_fft,
                                     hop_length=hop_length)
            power_spectrogram = np.abs(stft)**2
            return power_spectrogram

        power_spectrogram = np.abs(
            librosa.core.stft(np.asfortranarray(signal[:, 0]),
                              n_fft=n_fft,
                              hop_length=hop_length))**2
        for i in range(1, signal.shape[1]):
            power_spectrogram += np.abs(
                librosa.core.stft(np.asfortranarray(signal[:, i]),
                                  n_fft=n_fft,
                                  hop_length=hop_length))**2
        return power_spectrogram

    elif feature.lower() == "pcp_stft":
        if len(signal.shape) == 1:
            audio_harmonic, _ = librosa.effects.hpss(
                y=np.asfortranarray(signal))
            chroma_stft = librosa.feature.chroma_stft(y=audio_harmonic,
                                                      sr=sr,
                                                      n_fft=n_fft,
                                                      hop_length=hop_length)
            return chroma_stft
        audio_harmonic, _ = librosa.effects.hpss(
            y=np.asfortranarray(signal[:, 0]))
        chroma_stft = librosa.feature.chroma_stft(y=audio_harmonic,
                                                  sr=sr,
                                                  n_fft=n_fft,
                                                  hop_length=hop_length)
        for i in range(1, signal.shape[1]):
            audio_harmonic, _ = librosa.effects.hpss(
                y=np.asfortranarray(signal[:, i]))
            chroma_stft += librosa.feature.chroma_stft(y=audio_harmonic,
                                                       sr=sr,
                                                       n_fft=n_fft,
                                                       hop_length=hop_length)
        return chroma_stft
    elif feature == "pcp":
        norm = inf  # Columns normalization
        win_len_smooth = 82  # Size of the smoothign window
        n_octaves = 6
        bins_per_chroma = 3
        bins_per_octave = bins_per_chroma * 12
        if len(signal.shape) == 1:
            return librosa.feature.chroma_cens(y=np.asfortranarray(signal),
                                               sr=sr,
                                               hop_length=hop_length,
                                               fmin=fmin,
                                               n_chroma=12,
                                               n_octaves=n_octaves,
                                               bins_per_octave=bins_per_octave,
                                               norm=norm,
                                               win_len_smooth=win_len_smooth)

        pcp = librosa.feature.chroma_cens(y=np.asfortranarray(signal[:, 0]),
                                          sr=sr,
                                          hop_length=hop_length,
                                          fmin=fmin,
                                          n_chroma=12,
                                          n_octaves=n_octaves,
                                          bins_per_octave=bins_per_octave,
                                          norm=norm,
                                          win_len_smooth=win_len_smooth)
        for i in range(1, signal.shape[1]):
            pcp += librosa.feature.chroma_cens(y=np.asfortranarray(signal[:,
                                                                          i]),
                                               sr=sr,
                                               hop_length=hop_length,
                                               fmin=fmin,
                                               n_chroma=12,
                                               n_octaves=n_octaves,
                                               bins_per_octave=bins_per_octave,
                                               norm=norm,
                                               win_len_smooth=win_len_smooth)

        return pcp
    elif feature.lower() == "cqt":
        if len(signal.shape) == 1:
            constant_q_transf = librosa.core.cqt(np.asfortranarray(signal),
                                                 sr=sr,
                                                 hop_length=hop_length)
            power_cqt = np.abs(constant_q_transf)**2
            return power_cqt
        power_cqt = np.abs(
            librosa.core.cqt(np.asfortranarray(signal[:, 0]),
                             sr=sr,
                             hop_length=hop_length))**2
        for i in range(1, signal.shape[1]):
            power_cqt += np.abs(
                librosa.core.cqt(np.asfortranarray(signal[:, i]),
                                 sr=sr,
                                 hop_length=hop_length))**2
        return power_cqt
    elif feature.lower() == "tonnetz":
        if len(signal.shape) == 1:
            return librosa.feature.tonnetz(np.asfortranarray(signal), sr=sr)
        tonnetz = librosa.feature.tonnetz(np.asfortranarray(signal[:, 0]),
                                          sr=sr)
        for i in range(1, signal.shape[1]):
            tonnetz += librosa.feature.tonnetz(np.asfortranarray(signal[:, i]),
                                               sr=sr)
        return tonnetz
    elif feature.lower() == "pcp_tonnetz":
        return librosa.feature.tonnetz(y=None,
                                       sr=None,
                                       chroma=get_spectrogram(signal,
                                                              sr,
                                                              "pcp",
                                                              hop_length,
                                                              fmin=fmin))
    else:
        raise err.InvalidArgumentValueException(
            "Unknown signal representation.")