Beispiel #1
0
def freq_slice(fmin, fmax, sr, n_fft):
    '''Calculate the slice needed to select a frequency band.

    Arguments:
        fmin, fmax (int): the frequency bounds
        sr (int): the sample rate
        n_fft (int): the fft size

    Returns:
        slice(i[fmin], i[fmax])
    '''
    if not sr or not n_fft:
        raise ParameterError("You must set a sr=({}) and n_fft=({})".format(
            sr, n_fft))

    if fmin and fmin < 0:
        raise ParameterError("fmin={} must be nonnegative".format(fmin))

    if fmax and fmax > (sr / 2):
        raise ParameterError(
            "fmax={} must be smaller than nyquist, f={}".format(fmax, sr))

    fft_frequencies = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    bin_start = np.where(fft_frequencies >= fmin)[0][0] if fmin else None
    bin_stop = np.where(fft_frequencies < fmax)[0][-1] if fmax else None
    return slice(bin_start, bin_stop)
Beispiel #2
0
    def __init__(self, lag=False, unit=None):

        if unit not in ["s", "ms", None]:
            raise ParameterError("Unknown time unit: {}".format(unit))

        self.unit = unit
        self.lag = lag
Beispiel #3
0
def __early_downsample_tf(y, sr, hop_length, res_type, n_octaves, nyquist,
                          filter_cutoff, scale, use_smoothing):
    '''Perform early downsampling on an audio signal, if it applies.'''

    downsample_count = __early_downsample_count(nyquist, filter_cutoff,
                                                hop_length, n_octaves)

    if downsample_count > 0 and res_type == 'kaiser_fast':
        downsample_factor = 2**(downsample_count)

        hop_length //= downsample_factor

        if y.get_shape().as_list()[1] < downsample_factor:
            raise ParameterError('Input signal length={:d} is too short for '
                                 '{:d}-octave CQT'.format(len(y), n_octaves))

        new_sr = sr / float(downsample_factor)

        print('Early downsample, from sr:', sr, 'to new_sr:', new_sr)
        y = audio_resample_tf(y,
                              sr,
                              new_sr,
                              scale=scale,
                              use_smoothing=use_smoothing)

        # If we're not going to length-scale after CQT, we
        # need to compensate for the downsampling factor here
        if not scale:
            y *= np.sqrt(downsample_factor)

        sr = new_sr

    return y, sr, hop_length
Beispiel #4
0
def __early_downsample(y, sr, hop_length, res_type, n_octaves, nyquist,
                       filter_cutoff, scale):
    '''Perform early downsampling on an audio signal, if it applies.'''

    downsample_count = __early_downsample_count(nyquist, filter_cutoff,
                                                hop_length, n_octaves)

    if downsample_count > 0 and res_type == 'kaiser_fast':
        downsample_factor = 2**(downsample_count)

        hop_length //= downsample_factor

        if len(y) < downsample_factor:
            raise ParameterError('Input signal length={:d} is too short for '
                                 '{:d}-octave CQT'.format(len(y), n_octaves))

        new_sr = sr / float(downsample_factor)
        y = audio.resample(y, sr, new_sr, res_type=res_type, scale=True)

        # If we're not going to length-scale after CQT, we
        # need to compensate for the downsampling factor here
        if not scale:
            y *= np.sqrt(downsample_factor)

        sr = new_sr

    return y, sr, hop_length
def tempo(y=None, sr=22050, onset_envelope=None, hop_length=512, start_bpm=120,
          std_bpm=1.0, ac_size=8.0, max_tempo=320.0, aggregate=np.mean):

    if start_bpm <= 0:
        raise ParameterError('start_bpm must be strictly positive')

    win_length = np.asscalar(core.time_to_frames(ac_size, sr=sr,
                                                 hop_length=hop_length))

    tg = tempogram(y=y, sr=sr,
                   onset_envelope=onset_envelope,
                   hop_length=hop_length,
                   win_length=win_length)

    # Eventually, we want this to work for time-varying tempo
    if aggregate is not None:
        tg = aggregate(tg, axis=1, keepdims=True)

    # Get the BPM values for each bin, skipping the 0-lag bin
    bpms = core.tempo_frequencies(tg.shape[0], hop_length=hop_length, sr=sr)

    # Weight the autocorrelation by a log-normal distribution
    prior = np.exp(-0.5 * ((np.log2(bpms) - np.log2(start_bpm)) / std_bpm)**2)

    prior2 = np.argsort(prior, axis=0)
    prior2_idx = prior2[-2]
    # print(prior2_idx)
    # print('prior_2_idx', prior2_idx)

    # Kill everything above the max tempo
    if max_tempo is not None:
        max_idx = np.argmax(bpms < max_tempo)
        prior[:max_idx] = 0

    # Really, instead of multiplying by the prior, we should set up a
    # probabilistic model for tempo and add log-probabilities.
    # This would give us a chance to recover from null signals and
    # rely on the prior.
    # it would also make time aggregation much more natural

    # Get the maximum, weighted by the prior

    period = tg * prior[:, np.newaxis]
    best_period = np.argmax(period, axis=0)
    best_2 = np.argsort(period, axis=0)
    prior2_idx = best_2[-2]
    #print(prior2_idx)
    #print(best_period)

    second_period = prior2_idx
    tempi = bpms[best_period]
    tempi2 = bpms[second_period]
    #print(type(tempi), type(tempi2))
    # Wherever the best tempo is index 0, return start_bpm
    tempi[best_period == 0] = start_bpm
    tempi2[second_period == 0] = start_bpm
    return (tempi2.astype(float)[0].item(), tempi.astype(float)[0].item())
Beispiel #6
0
def __check_axes(axes):
    """Check if "axes" is an instance of an axis object. If not, use `gca`."""
    if axes is None:
        import matplotlib.pyplot as plt

        axes = plt.gca()
    elif not isinstance(axes, Axes):
        raise ParameterError(
            "`axes` must be an instance of matplotlib.axes.Axes. "
            "Found type(axes)={}".format(type(axes)))
    return axes
Beispiel #7
0
    def __init__(self, Sa, octave=True, major=True, abbr=False, mela=None):

        if Sa is None:
            raise ParameterError(
                "Sa frequency is required for svara display formatting")

        self.Sa = Sa
        self.octave = octave
        self.major = major
        self.abbr = abbr
        self.mela = mela
Beispiel #8
0
def __mesh_coords(ax_type, coords, n, **kwargs):
    """Compute axis coordinates"""

    if coords is not None:
        if len(coords) < n:
            raise ParameterError("Coordinate shape mismatch: "
                                 "{}<{}".format(len(coords), n))
        return coords

    coord_map = {
        "linear": __coord_fft_hz,
        "fft": __coord_fft_hz,
        "fft_note": __coord_fft_hz,
        "fft_svara": __coord_fft_hz,
        "hz": __coord_fft_hz,
        "log": __coord_fft_hz,
        "mel": __coord_mel_hz,
        "cqt": __coord_cqt_hz,
        "cqt_hz": __coord_cqt_hz,
        "cqt_note": __coord_cqt_hz,
        "cqt_svara": __coord_cqt_hz,
        "chroma": __coord_chroma,
        "chroma_c": __coord_chroma,
        "chroma_h": __coord_chroma,
        "time": __coord_time,
        "s": __coord_time,
        "ms": __coord_time,
        "lag": __coord_time,
        "lag_s": __coord_time,
        "lag_ms": __coord_time,
        "tonnetz": __coord_n,
        "off": __coord_n,
        "tempo": __coord_tempo,
        "fourier_tempo": __coord_fourier_tempo,
        "frames": __coord_n,
        None: __coord_n,
    }

    if ax_type not in coord_map:
        raise ParameterError("Unknown axis type: {}".format(ax_type))
    return coord_map[ax_type](n, **kwargs)
Beispiel #9
0
    def power_to_db(self, input):
        """Power to db, this function is the pytorch implementation of 
        librosa.core.power_to_lb
        """
        ref_value = self.ref
        log_spec = 10.0 * torch.log10(torch.clamp(input, min=self.amin, max=np.inf))
        log_spec -= 10.0 * np.log10(np.maximum(self.amin, ref_value))

        if self.top_db is not None:
            if self.top_db < 0:
                raise ParameterError('top_db must be non-negative')
            log_spec = torch.clamp(log_spec, min=log_spec.max().item() - self.top_db, max=np.inf)

        return log_spec
def lpc(y, order):
    """Linear Prediction Coefficients via Burg's method

    This function applies Burg's method to estimate coefficients of a linear
    filter on `y` of order `order`.  Burg's method is an extension to the
    Yule-Walker approach, which are both sometimes referred to as LPC parameter
    estimation by autocorrelation.

    It follows the description and implementation approach described in the
    introduction in [1]_.  N.B. This paper describes a different method, which
    is not implemented here, but has been chosen for its clear explanation of
    Burg's technique in its introduction.

    .. [1] Larry Marple
           A New Autoregressive Spectrum Analysis Algorithm
           IEEE Transactions on Accoustics, Speech, and Signal Processing
           vol 28, no. 4, 1980

    Parameters
    ----------
    y : np.ndarray
        Time series to fit

    order : int > 0
        Order of the linear filter

    Returns
    -------
    a : np.ndarray of length order + 1
        LP prediction error coefficients, i.e. filter denominator polynomial

    Raises
    ------
    ParameterError
        - If y is not valid audio as per `util.valid_audio`
        - If order < 1 or not integer
    FloatingPointError
        - If y is ill-conditioned

    See also
    --------
    scipy.signal.lfilter

    Examples
    --------
    Compute LP coefficients of y at order 16 on entire series

    >>> y, sr = librosa.load(librosa.util.example_audio_file(), offset=30,
    ...                      duration=10)
    >>> librosa.lpc(y, 16)

    Compute LP coefficients, and plot LP estimate of original series

    >>> import matplotlib.pyplot as plt
    >>> import scipy
    >>> y, sr = librosa.load(librosa.util.example_audio_file(), offset=30,
    ...                      duration=0.020)
    >>> a = librosa.lpc(y, 2)
    >>> y_hat = scipy.signal.lfilter([0] + -1*a[1:], [1], y)
    >>> plt.figure()
    >>> plt.plot(y)
    >>> plt.plot(y_hat, linestyle='--')
    >>> plt.legend(['y', 'y_hat'])
    >>> plt.title('LP Model Forward Prediction')
    >>> plt.show()

    """
    if not isinstance(order, int) or order < 1:
        raise ParameterError("order must be an integer > 0")

    util.valid_audio(y, mono=True)

    return __lpc(y, order)
Beispiel #11
0
def block_stream(filename=None,
                 y=None,
                 sr=None,
                 frame_length=2048,
                 hop_length=512,
                 segment_duration=1,
                 n_blocks=1,
                 full_frames=True):
    '''Load audio into frames. If given a filename, it will load from file in blocks.
    If y and sr are given, slice into blocks

    Arguments:
        filename (str, optional):
        y, sr (np.ndarray, int, optional):
        frame_length (int):
        hop_length (int):
        block_duration (float)

    '''
    # load audio frame generator

    if filename is not None:
        if y is not None:
            raise ParameterError('Either y or filename must be equal to None')

        # get blocks from file
        duration = librosa.get_duration(filename=filename)
        orig_sr = librosa.get_samplerate(filename)
        sr = sr or orig_sr

        # see: https://librosa.github.io/librosa/_modules/librosa/core/audio.html#stream
        # block_length is in units of `frames` so reverse calculation
        block_length = max(segment_duration * orig_sr, frame_length)
        block_n_frames = librosa.core.samples_to_frames(
            block_length, frame_length, hop_length)

        n_total = duration * orig_sr / librosa.core.frames_to_samples(
            block_n_frames, frame_length, hop_length)
        n_total = int(n_total / n_blocks)

        y_blocks = librosa.stream(filename,
                                  block_length=block_n_frames * n_blocks,
                                  frame_length=frame_length,
                                  hop_length=hop_length)

        # will throw an error if audio is not valid
        y_blocks = (y for y in y_blocks
                    if librosa.util.valid_audio(y, mono=True))

        if sr != orig_sr:  # resample if we have a different sr
            y_blocks = (librosa.resample(y, orig_sr, sr) for y in y_blocks)

    else:
        if y is None or sr is None:
            raise ParameterError(
                'At least one of (y, sr) or filename must be provided')

        librosa.util.valid_audio(y, mono=True)

        # get block length, make it evenly divisible into frames (with hop)
        block_length = max(segment_duration * sr,
                           frame_length) * n_blocks  # min block size = 1 frame
        block_length = librosa.core.samples_to_frames(
            block_length, frame_length, hop_length)  # convert to even frames
        block_length = librosa.core.frames_to_samples(
            block_length, frame_length, hop_length)  # convert back

        # get frames from array
        y_blocks = librosa.util.frame(y, block_length, block_length).T

        n_total = len(y_blocks)

    if full_frames:  # drop any frames that are incomplete
        y_blocks = (y for y in y_blocks if y.size == block_length)

    return y_blocks, n_total, sr
Beispiel #12
0
def waveplot(
    y,
    sr=22050,
    max_points=5e4,
    x_axis="time",
    offset=0.0,
    max_sr=1000,
    ax=None,
    **kwargs,
):
    """Plot the amplitude envelope of a waveform.

    If ``y`` is monophonic, a filled curve is drawn between ``[-abs(y), abs(y)]``.

    If ``y`` is stereo, the curve is drawn between ``[-abs(y[1]), abs(y[0])]``,
    so that the left and right channels are drawn above and below the axis,
    respectively.

    Long signals (``duration >= max_points``) are down-sampled to at
    most ``max_sr`` before plotting.

    .. warning::
        This function is deprecated in librosa 0.8.1 and will be removed
        in 0.9.0.  Its functionality is replaced and extended by `waveshow`.

    Parameters
    ----------
    y : np.ndarray [shape=(n,) or (2,n)]
        audio time series (mono or stereo)

    sr : number > 0 [scalar]
        sampling rate of ``y``

    max_points : positive number or None
        Maximum number of time-points to plot: if ``max_points`` exceeds
        the duration of ``y``, then ``y`` is downsampled.

        If `None`, no downsampling is performed.

    x_axis : str or None
        Display of the x-axis ticks and tick markers. Accepted values are:

        - 'time' : markers are shown as milliseconds, seconds, minutes, or hours.
                    Values are plotted in units of seconds.

        - 's' : markers are shown as seconds.

        - 'ms' : markers are shown as milliseconds.

        - 'lag' : like time, but past the halfway point counts as negative values.

        - 'lag_s' : same as lag, but in seconds.

        - 'lag_ms' : same as lag, but in milliseconds.

        - `None`, 'none', or 'off': ticks and tick markers are hidden.


    ax : matplotlib.axes.Axes or None
        Axes to plot on instead of the default `plt.gca()`.

    offset : float
        Horizontal offset (in seconds) to start the waveform plot

    max_sr : number > 0 [scalar]
        Maximum sampling rate for the visualization

    kwargs
        Additional keyword arguments to `matplotlib.pyplot.fill_between`

    Returns
    -------
    pc : matplotlib.collections.PolyCollection
        The PolyCollection created by `fill_between`.

    See also
    --------
    waveshow
    librosa.resample
    matplotlib.pyplot.fill_between


    Examples
    --------
    Plot a monophonic waveform

    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.ex('choice'), duration=10)
    >>> fig, ax = plt.subplots(nrows=3, sharex=True, sharey=True)
    >>> librosa.display.waveplot(y, sr=sr, ax=ax[0])
    >>> ax[0].set(title='Monophonic')
    >>> ax[0].label_outer()

    Or a stereo waveform

    >>> y, sr = librosa.load(librosa.ex('choice', hq=True), mono=False, duration=10)
    >>> librosa.display.waveplot(y, sr=sr, ax=ax[1])
    >>> ax[1].set(title='Stereo')
    >>> ax[1].label_outer()

    Or harmonic and percussive components with transparency

    >>> y, sr = librosa.load(librosa.ex('choice'), duration=10)
    >>> y_harm, y_perc = librosa.effects.hpss(y)
    >>> librosa.display.waveplot(y_harm, sr=sr, alpha=0.25, ax=ax[2])
    >>> librosa.display.waveplot(y_perc, sr=sr, color='r', alpha=0.5, ax=ax[2])
    >>> ax[2].set(title='Harmonic + Percussive')
    """

    util.valid_audio(y, mono=False)

    if not (isinstance(max_sr, (int, np.integer)) and max_sr > 0):
        raise ParameterError("max_sr must be a non-negative integer")

    target_sr = sr
    hop_length = 1

    # Pad an extra channel dimension, if necessary
    if y.ndim == 1:
        y = y[np.newaxis, :]

    if max_points is not None:
        if max_points <= 0:
            raise ParameterError("max_points must be strictly positive")

        if max_points < y.shape[-1]:
            target_sr = min(max_sr, (sr * y.shape[-1]) // max_points)

        hop_length = sr // target_sr

    # Reduce by envelope calculation
    y = __envelope(y, hop_length)

    y_top = y[0]
    y_bottom = -y[-1]

    axes = __check_axes(ax)

    kwargs.setdefault("color", next(axes._get_lines.prop_cycler)["color"])

    locs = offset + core.times_like(y_top, sr=sr, hop_length=hop_length)

    out = axes.fill_between(locs, y_bottom, y_top, **kwargs)

    axes.set_xlim([locs.min(), locs.max()])

    # Construct tickers and locators
    __decorate_axis(axes.xaxis, x_axis)

    return out
Beispiel #13
0
def waveshow(
    y,
    sr=22050,
    max_points=11025,
    x_axis="time",
    offset=0.0,
    marker="",
    where="post",
    label=None,
    ax=None,
    **kwargs,
):
    """Visualize a waveform in the time domain.

    This function constructs a plot which adaptively switches between a raw
    samples-based view of the signal (`matplotlib.pyplot.step`) and an
    amplitude-envelope view of the signal (`matplotlib.pyplot.fill_between`)
    depending on the time extent of the plot's viewport.

    More specifically, when the plot spans a time interval of less than ``max_points /
    sr`` (by default, 1/2 second), the samples-based view is used, and otherwise a
    downsampled amplitude envelope is used.
    This is done to limit the complexity of the visual elements to guarantee an
    efficient, visually interpretable plot.

    When using interactive rendering (e.g., in a Jupyter notebook or IPython
    console), the plot will automatically update as the view-port is changed, either
    through widget controls or programmatic updates.

    .. note:: When visualizing stereo waveforms, the amplitude envelope will be generated
        so that the upper limits derive from the left channel, and the lower limits derive
        from the right channel, which can produce a vertically asymmetric plot.

        When zoomed in to the sample view, only the first channel will be shown.
        If you want to visualize both channels at the sample level, it is recommended to
        plot each signal independently.


    Parameters
    ----------
    y : np.ndarray [shape=(n,) or (2,n)]
        audio time series (mono or stereo)

    sr : number > 0 [scalar]
        sampling rate of ``y`` (samples per second)

    max_points : positive integer
        Maximum number of samples to draw.  When the plot covers a time extent
        smaller than ``max_points / sr`` (default: 1/2 second), samples are drawn.

        If drawing raw samples would exceed `max_points`, then a downsampled
        amplitude envelope extracted from non-overlapping windows of `y` is
        visualized instead.  The parameters of the amplitude envelope are defined so
        that the resulting plot cannot produce more than `max_points` frames.

    x_axis : str or None
        Display of the x-axis ticks and tick markers. Accepted values are:

        - 'time' : markers are shown as milliseconds, seconds, minutes, or hours.
                    Values are plotted in units of seconds.

        - 's' : markers are shown as seconds.

        - 'ms' : markers are shown as milliseconds.

        - 'lag' : like time, but past the halfway point counts as negative values.

        - 'lag_s' : same as lag, but in seconds.

        - 'lag_ms' : same as lag, but in milliseconds.

        - `None`, 'none', or 'off': ticks and tick markers are hidden.


    ax : matplotlib.axes.Axes or None
        Axes to plot on instead of the default `plt.gca()`.

    offset : float
        Horizontal offset (in seconds) to start the waveform plot

    marker : string
        Marker symbol to use for sample values. (default: no markers)

        See also: `matplotlib.markers`.

    where : string, {'pre', 'mid', 'post'}
        This setting determines how both waveform and envelope plots interpolate
        between observations.

        See `matplotlib.pyplot.step` for details.

        Default: 'post'

    label : string [optional]
        The label string applied to this plot.
        Note that the label

    kwargs
        Additional keyword arguments to `matplotlib.pyplot.fill_between` and
        `matplotlib.pyplot.step`.

        Note that only those arguments which are common to both functions will be
        supported.

    Returns
    -------
    librosa.display.AdaptiveWaveplot
        An object of type `librosa.display.AdaptiveWaveplot`

    See also
    --------
    AdaptiveWaveplot
    matplotlib.pyplot.step
    matplotlib.pyplot.fill_between
    matplotlib.markers


    Examples
    --------
    Plot a monophonic waveform with an envelope view

    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.ex('choice'), duration=10)
    >>> fig, ax = plt.subplots(nrows=3, sharex=True)
    >>> librosa.display.waveshow(y, sr=sr, ax=ax[0])
    >>> ax[0].set(title='Envelope view, mono')
    >>> ax[0].label_outer()

    Or a stereo waveform

    >>> y, sr = librosa.load(librosa.ex('choice', hq=True), mono=False, duration=10)
    >>> librosa.display.waveshow(y, sr=sr, ax=ax[1])
    >>> ax[1].set(title='Envelope view, stereo')
    >>> ax[1].label_outer()

    Or harmonic and percussive components with transparency

    >>> y, sr = librosa.load(librosa.ex('choice'), duration=10)
    >>> y_harm, y_perc = librosa.effects.hpss(y)
    >>> librosa.display.waveshow(y_harm, sr=sr, alpha=0.5, ax=ax[2], label='Harmonic')
    >>> librosa.display.waveshow(y_perc, sr=sr, color='r', alpha=0.5, ax=ax[2], label='Percussive')
    >>> ax[2].set(title='Multiple waveforms')
    >>> ax[2].legend()

    Zooming in on a plot to show raw sample values

    >>> fig, (ax, ax2) = plt.subplots(nrows=2, sharex=True)
    >>> ax.set(xlim=[6.0, 6.01], title='Sample view', ylim=[-0.2, 0.2])
    >>> librosa.display.waveshow(y, sr=sr, ax=ax, marker='.', label='Full signal')
    >>> librosa.display.waveshow(y_harm, sr=sr, alpha=0.5, ax=ax2, label='Harmonic')
    >>> librosa.display.waveshow(y_perc, sr=sr, color='r', alpha=0.5, ax=ax2, label='Percussive')
    >>> ax.label_outer()
    >>> ax.legend()
    >>> ax2.legend()

    """
    util.valid_audio(y, mono=False)

    # Pad an extra channel dimension, if necessary
    if y.ndim == 1:
        y = y[np.newaxis, :]

    if max_points <= 0:
        raise ParameterError(
            "max_points={} must be strictly positive".format(max_points))

    # Create the adaptive drawing object
    axes = __check_axes(ax)

    if "color" not in kwargs:
        kwargs.setdefault("color", next(axes._get_lines.prop_cycler)["color"])

    # Reduce by envelope calculation
    # this choice of hop ensures that the envelope has at most max_points values
    hop_length = max(1, y.shape[-1] // max_points)
    y_env = __envelope(y, hop_length)

    # Split the envelope into top and bottom
    y_bottom, y_top = -y_env[-1], y_env[0]

    times = offset + core.times_like(y, sr=sr, hop_length=1)

    # Only plot up to max_points worth of data here
    (steps, ) = axes.step(times[:max_points],
                          y[0, :max_points],
                          marker=marker,
                          where=where,
                          **kwargs)

    envelope = axes.fill_between(
        times[:len(y_top) * hop_length:hop_length],
        y_bottom,
        y_top,
        step=where,
        label=label,
        **kwargs,
    )
    adaptor = AdaptiveWaveplot(times,
                               y[0],
                               steps,
                               envelope,
                               sr=sr,
                               max_samples=max_points)

    axes.callbacks.connect("xlim_changed", adaptor.update)

    # Force an initial update to ensure the state is consistent
    adaptor.update(axes)

    # Construct tickers and locators
    __decorate_axis(axes.xaxis, x_axis)

    return adaptor
Beispiel #14
0
def cqt_tf(y,
           sr=22050,
           hop_length=512,
           fmin=None,
           n_bins=84,
           bins_per_octave=12,
           filter_scale=1,
           norm=1,
           sparsity=0.01,
           window='hann',
           scale=True,
           pad_mode='reflect',
           use_smoothing=True,
           return_added_samples=False,
           debug=False):

    tuning = 0.0
    # How many octaves are we dealing with?
    n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))
    n_filters = min(bins_per_octave, n_bins)

    len_orig = y.get_shape().as_list()[1]

    added_samples = []

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    # First thing, get the freqs of the top octave
    freqs = cqt_frequencies(n_bins, fmin,
                            bins_per_octave=bins_per_octave)[-bins_per_octave:]

    fmin_t = np.min(freqs)
    fmax_t = np.max(freqs)

    # Determine required resampling quality
    Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1)
    filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q)
    nyquist = sr / 2.0
    if filter_cutoff < audio.BW_FASTEST * nyquist:
        res_type = 'kaiser_fast'
    else:
        res_type = 'kaiser_best'

    y, sr, hop_length = __early_downsample_tf(y, sr, hop_length, res_type,
                                              n_octaves, nyquist,
                                              filter_cutoff, scale,
                                              use_smoothing)

    #print('y after early downsaple:', y.get_shape().as_list()[1])
    cqt_resp = []

    if res_type != 'kaiser_fast':

        # Do the top octave before resampling to allow for fast resampling
        fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                               fmin_t,
                                               n_filters,
                                               bins_per_octave,
                                               tuning,
                                               filter_scale,
                                               norm,
                                               sparsity,
                                               window=window)

        fft_basis = fft_basis.astype('complex64')

        fft_basis_tf = tf.constant(fft_basis, dtype=tf.complex64)
        fft_basis_tf = tf.transpose(fft_basis_tf)
        # Compute the CQT filter response and append it to the stack
        cqt_res, add_samples = __cqt_response_tf(y, n_fft, hop_length,
                                                 fft_basis_tf, pad_mode, debug)
        cqt_resp.append(cqt_res)
        added_samples += [add_samples]

        fmin_t /= 2
        fmax_t /= 2
        n_octaves -= 1

        filter_cutoff = fmax_t * (1 +
                                  0.5 * filters.window_bandwidth(window) / Q)

        res_type = 'kaiser_fast'

    # Make sure our hop is long enough to support the bottom octave
    num_twos = __num_two_factors(hop_length)
    if num_twos < n_octaves - 1:
        raise ParameterError('hop_length must be a positive integer '
                             'multiple of 2^{0:d} for {1:d}-octave CQT'.format(
                                 n_octaves - 1, n_octaves))

    # Now do the recursive bit
    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin_t,
                                           n_filters,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           window=window)

    fft_basis = fft_basis.astype('complex64')

    fft_basis_tf = tf.constant(fft_basis, dtype=tf.complex64)
    fft_basis_tf = tf.transpose(fft_basis_tf)

    my_y, my_sr, my_hop = y, sr, hop_length

    # Iterate down the octaves
    for i in range(n_octaves):

        # Resample (except first time)
        if i > 0:
            if my_y.get_shape().as_list()[1] < 2:
                raise ParameterError('Input signal length={} is too short for '
                                     '{:d}-octave CQT'.format(
                                         len_orig, n_octaves))

            #print('Resample from ', my_sr, 'to', my_sr/2.0)
            my_y = audio_resample_tf(my_y,
                                     my_sr,
                                     my_sr / 2.0,
                                     res_type=res_type,
                                     scale=True,
                                     use_smoothing=use_smoothing)
            # The re-scale the filters to compensate for downsampling
            fft_basis_tf *= np.sqrt(2)

            my_sr /= 2.0
            my_hop //= 2
            #print('y after early downsaple:', my_y.get_shape().as_list()[1])

        # Compute the cqt filter response and append to the stack
        cqt_res, add_samples = __cqt_response_tf(my_y, n_fft, my_hop,
                                                 fft_basis_tf, pad_mode, debug)
        cqt_resp.append(cqt_res)
        added_samples += [add_samples]

    C = __trim_stack_tf(cqt_resp, n_bins)

    if scale:
        lengths = filters.constant_q_lengths(sr,
                                             fmin,
                                             n_bins=n_bins,
                                             bins_per_octave=bins_per_octave,
                                             tuning=tuning,
                                             window=window,
                                             filter_scale=filter_scale)
        lengths_tf = tf.constant(lengths.astype('complex64'),
                                 dtype=tf.complex64)
        C /= tf.sqrt(lengths_tf[:, tf.newaxis])

    if return_added_samples:
        return C, added_samples
    else:
        return C
Beispiel #15
0
def __decorate_axis(axis,
                    ax_type,
                    key="C:maj",
                    Sa=None,
                    mela=None,
                    thaat=None):
    """Configure axis tickers, locators, and labels"""

    if ax_type == "tonnetz":
        axis.set_major_formatter(TonnetzFormatter())
        axis.set_major_locator(FixedLocator(0.5 + np.arange(6)))
        axis.set_label_text("Tonnetz")

    elif ax_type == "chroma":
        axis.set_major_formatter(ChromaFormatter(key=key))
        degrees = core.key_to_degrees(key)
        axis.set_major_locator(
            FixedLocator(0.5 +
                         np.add.outer(12 * np.arange(10), degrees).ravel()))
        axis.set_label_text("Pitch class")

    elif ax_type == "chroma_h":
        if Sa is None:
            Sa = 0
        axis.set_major_formatter(ChromaSvaraFormatter(Sa=Sa))
        if thaat is None:
            # If no thaat is given, show all svara
            degrees = np.arange(12)
        else:
            degrees = core.thaat_to_degrees(thaat)
        # Rotate degrees relative to Sa
        degrees = np.mod(degrees + Sa, 12)
        axis.set_major_locator(
            FixedLocator(0.5 +
                         np.add.outer(12 * np.arange(10), degrees).ravel()))
        axis.set_label_text("Svara")

    elif ax_type == "chroma_c":
        if Sa is None:
            Sa = 0
        axis.set_major_formatter(ChromaSvaraFormatter(Sa=Sa, mela=mela))
        degrees = core.mela_to_degrees(mela)
        # Rotate degrees relative to Sa
        degrees = np.mod(degrees + Sa, 12)
        axis.set_major_locator(
            FixedLocator(0.5 +
                         np.add.outer(12 * np.arange(10), degrees).ravel()))
        axis.set_label_text("Svara")

    elif ax_type in ["tempo", "fourier_tempo"]:
        axis.set_major_formatter(ScalarFormatter())
        axis.set_major_locator(LogLocator(base=2.0))
        axis.set_label_text("BPM")

    elif ax_type == "time":
        axis.set_major_formatter(TimeFormatter(unit=None, lag=False))
        axis.set_major_locator(
            MaxNLocator(prune=None, steps=[1, 1.5, 5, 6, 10]))
        axis.set_label_text("Time")

    elif ax_type == "s":
        axis.set_major_formatter(TimeFormatter(unit="s", lag=False))
        axis.set_major_locator(
            MaxNLocator(prune=None, steps=[1, 1.5, 5, 6, 10]))
        axis.set_label_text("Time (s)")

    elif ax_type == "ms":
        axis.set_major_formatter(TimeFormatter(unit="ms", lag=False))
        axis.set_major_locator(
            MaxNLocator(prune=None, steps=[1, 1.5, 5, 6, 10]))
        axis.set_label_text("Time (ms)")

    elif ax_type == "lag":
        axis.set_major_formatter(TimeFormatter(unit=None, lag=True))
        axis.set_major_locator(
            MaxNLocator(prune=None, steps=[1, 1.5, 5, 6, 10]))
        axis.set_label_text("Lag")

    elif ax_type == "lag_s":
        axis.set_major_formatter(TimeFormatter(unit="s", lag=True))
        axis.set_major_locator(
            MaxNLocator(prune=None, steps=[1, 1.5, 5, 6, 10]))
        axis.set_label_text("Lag (s)")

    elif ax_type == "lag_ms":
        axis.set_major_formatter(TimeFormatter(unit="ms", lag=True))
        axis.set_major_locator(
            MaxNLocator(prune=None, steps=[1, 1.5, 5, 6, 10]))
        axis.set_label_text("Lag (ms)")

    elif ax_type == "cqt_note":
        axis.set_major_formatter(NoteFormatter(key=key))
        # Where is C1 relative to 2**k hz?
        log_C1 = np.log2(core.note_to_hz("C1"))
        C_offset = 2.0**(log_C1 - np.floor(log_C1))
        axis.set_major_locator(LogLocator(base=2.0, subs=(C_offset, )))
        axis.set_minor_formatter(NoteFormatter(key=key, major=False))
        axis.set_minor_locator(
            LogLocator(base=2.0,
                       subs=C_offset * 2.0**(np.arange(1, 12) / 12.0)))
        axis.set_label_text("Note")

    elif ax_type == "cqt_svara":
        axis.set_major_formatter(SvaraFormatter(Sa=Sa, mela=mela))
        # Find the offset of Sa relative to 2**k Hz
        sa_offset = 2.0**(np.log2(Sa) - np.floor(np.log2(Sa)))

        axis.set_major_locator(LogLocator(base=2.0, subs=(sa_offset, )))
        axis.set_minor_formatter(SvaraFormatter(Sa=Sa, mela=mela, major=False))
        axis.set_minor_locator(
            LogLocator(base=2.0,
                       subs=sa_offset * 2.0**(np.arange(1, 12) / 12.0)))
        axis.set_label_text("Svara")

    elif ax_type in ["cqt_hz"]:
        axis.set_major_formatter(LogHzFormatter())
        log_C1 = np.log2(core.note_to_hz("C1"))
        C_offset = 2.0**(log_C1 - np.floor(log_C1))
        axis.set_major_locator(LogLocator(base=2.0, subs=(C_offset, )))
        axis.set_major_locator(LogLocator(base=2.0))
        axis.set_minor_formatter(LogHzFormatter(major=False))
        axis.set_minor_locator(
            LogLocator(base=2.0,
                       subs=C_offset * 2.0**(np.arange(1, 12) / 12.0)))
        axis.set_label_text("Hz")

    elif ax_type == "fft_note":
        axis.set_major_formatter(NoteFormatter(key=key))
        # Where is C1 relative to 2**k hz?
        log_C1 = np.log2(core.note_to_hz("C1"))
        C_offset = 2.0**(log_C1 - np.floor(log_C1))
        axis.set_major_locator(SymmetricalLogLocator(axis.get_transform()))
        axis.set_minor_formatter(NoteFormatter(key=key, major=False))
        axis.set_minor_locator(
            LogLocator(base=2.0, subs=2.0**(np.arange(1, 12) / 12.0)))
        axis.set_label_text("Note")

    elif ax_type == "fft_svara":
        axis.set_major_formatter(SvaraFormatter(Sa=Sa, mela=mela))
        # Find the offset of Sa relative to 2**k Hz
        log_Sa = np.log2(Sa)
        sa_offset = 2.0**(log_Sa - np.floor(log_Sa))

        axis.set_major_locator(
            SymmetricalLogLocator(axis.get_transform(),
                                  base=2.0,
                                  subs=[sa_offset]))
        axis.set_minor_formatter(SvaraFormatter(Sa=Sa, mela=mela, major=False))
        axis.set_minor_locator(
            LogLocator(base=2.0,
                       subs=sa_offset * 2.0**(np.arange(1, 12) / 12.0)))
        axis.set_label_text("Svara")

    elif ax_type in ["mel", "log"]:
        axis.set_major_formatter(ScalarFormatter())
        axis.set_major_locator(SymmetricalLogLocator(axis.get_transform()))
        axis.set_label_text("Hz")

    elif ax_type in ["linear", "hz", "fft"]:
        axis.set_major_formatter(ScalarFormatter())
        axis.set_label_text("Hz")

    elif ax_type in ["frames"]:
        axis.set_label_text("Frames")

    elif ax_type in ["off", "none", None]:
        axis.set_label_text("")
        axis.set_ticks([])

    else:
        raise ParameterError("Unsupported axis type: {}".format(ax_type))
Beispiel #16
0
def cqt(y,
        sr=22050,
        hop_length=512,
        fmin=None,
        n_bins=84,
        bins_per_octave=12,
        tuning=0.0,
        filter_scale=1,
        norm=1,
        sparsity=0.01,
        window='hann',
        scale=True,
        pad_mode='reflect',
        res_type='scipy'):
    '''Compute the constant-Q transform of an audio signal.
    This implementation is based on the recursive sub-sampling method
    described by [1]_.
    .. [1] Schoerkhuber, Christian, and Anssi Klapuri.
        "Constant-Q transform toolbox for music processing."
        7th Sound and Music Computing Conference, Barcelona, Spain. 2010.
    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series
    sr : number > 0 [scalar]
        sampling rate of `y`
    hop_length : int > 0 [scalar]
        number of samples between successive CQT columns.
    fmin : float > 0 [scalar]
        Minimum frequency. Defaults to C1 ~= 32.70 Hz
    n_bins : int > 0 [scalar]
        Number of frequency bins, starting at `fmin`
    bins_per_octave : int > 0 [scalar]
        Number of bins per octave
    tuning : None or float in `[-0.5, 0.5)`
        Tuning offset in fractions of a bin (cents).
        If `None`, tuning will be automatically estimated from the signal.
    filter_scale : float > 0
        Filter scale factor. Small values (<1) use shorter windows
        for improved time resolution.
    norm : {inf, -inf, 0, float > 0}
        Type of norm to use for basis function normalization.
        See `librosa.util.normalize`.
    sparsity : float in [0, 1)
        Sparsify the CQT basis by discarding up to `sparsity`
        fraction of the energy in each basis.
        Set `sparsity=0` to disable sparsification.
    window : str, tuple, number, or function
        Window specification for the basis filters.
        See `filters.get_window` for details.
    scale : bool
        If `True`, scale the CQT response by square-root the length of
        each channel's filter.  This is analogous to `norm='ortho'` in FFT.
        If `False`, do not scale the CQT. This is analogous to
        `norm=None` in FFT.
    pad_mode : string
        Padding mode for centered frame analysis.
        See also: `librosa.core.stft` and `np.pad`.
    Returns
    -------
    CQT : np.ndarray [shape=(n_bins, t), dtype=np.complex or np.float]
        Constant-Q value each frequency at each time.
    Raises
    ------
    ParameterError
        If `hop_length` is not an integer multiple of
        `2**(n_bins / bins_per_octave)`
        Or if `y` is too short to support the frequency range of the CQT.
    See Also
    --------
    librosa.core.resample
    librosa.util.normalize
    Notes
    -----
    This function caches at level 20.
    Examples
    --------
    Generate and plot a constant-Q power spectrum
    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> C = np.abs(librosa.cqt(y, sr=sr))
    >>> librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
    ...                          sr=sr, x_axis='time', y_axis='cqt_note')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Constant-Q power spectrum')
    >>> plt.tight_layout()
    Limit the frequency range
    >>> C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('C2'),
    ...                 n_bins=60))
    >>> C
    array([[  8.827e-04,   9.293e-04, ...,   3.133e-07,   2.942e-07],
           [  1.076e-03,   1.068e-03, ...,   1.153e-06,   1.148e-06],
           ...,
           [  1.042e-07,   4.087e-07, ...,   1.612e-07,   1.928e-07],
           [  2.363e-07,   5.329e-07, ...,   1.294e-07,   1.611e-07]])
    Using a higher frequency resolution
    >>> C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('C2'),
    ...                 n_bins=60 * 2, bins_per_octave=12 * 2))
    >>> C
    array([[  1.536e-05,   5.848e-05, ...,   3.241e-07,   2.453e-07],
           [  1.856e-03,   1.854e-03, ...,   2.397e-08,   3.549e-08],
           ...,
           [  2.034e-07,   4.245e-07, ...,   6.213e-08,   1.463e-07],
           [  4.896e-08,   5.407e-07, ...,   9.176e-08,   1.051e-07]])
    '''

    # How many octaves are we dealing with?
    n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))
    n_filters = min(bins_per_octave, n_bins)

    len_orig = len(y)

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    if tuning is None:
        tuning = estimate_tuning(y=y, sr=sr)

    # First thing, get the freqs of the top octave
    freqs = cqt_frequencies(n_bins, fmin,
                            bins_per_octave=bins_per_octave)[-bins_per_octave:]

    fmin_t = np.min(freqs)
    fmax_t = np.max(freqs)

    # Determine required resampling quality
    Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1)
    filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q)
    nyquist = sr / 2.0

    y, sr, hop_length = __early_downsample(y, sr, hop_length, res_type,
                                           n_octaves, nyquist, filter_cutoff,
                                           scale)

    cqt_resp = []

    if res_type != 'kaiser_fast':

        # Do the top octave before resampling to allow for fast resampling
        fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                               fmin_t,
                                               n_filters,
                                               bins_per_octave,
                                               tuning,
                                               filter_scale,
                                               norm,
                                               sparsity,
                                               window=window)

        # Compute the CQT filter response and append it to the stack
        cqt_resp.append(
            __cqt_response(y, n_fft, hop_length, fft_basis, pad_mode))

        fmin_t /= 2
        fmax_t /= 2
        n_octaves -= 1

        filter_cutoff = fmax_t * (1 +
                                  0.5 * filters.window_bandwidth(window) / Q)

        res_type = 'kaiser_fast'

    # Make sure our hop is long enough to support the bottom octave
    num_twos = __num_two_factors(hop_length)
    if num_twos < n_octaves - 1:
        raise ParameterError('hop_length must be a positive integer '
                             'multiple of 2^{0:d} for {1:d}-octave CQT'.format(
                                 n_octaves - 1, n_octaves))

    # Now do the recursive bit
    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin_t,
                                           n_filters,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           window=window)

    my_y, my_sr, my_hop = y, sr, hop_length

    # Iterate down the octaves
    for i in range(n_octaves):

        # Resample (except first time)
        if i > 0:
            if len(my_y) < 2:
                raise ParameterError('Input signal length={} is too short for '
                                     '{:d}-octave CQT'.format(
                                         len_orig, n_octaves))

            my_y = audio.resample(my_y,
                                  my_sr,
                                  my_sr / 2.0,
                                  res_type=res_type,
                                  scale=True)
            # The re-scale the filters to compensate for downsampling
            fft_basis[:] *= np.sqrt(2)

            my_sr /= 2.0
            my_hop //= 2

        # Compute the cqt filter response and append to the stack
        cqt_resp.append(
            __cqt_response(my_y, n_fft, my_hop, fft_basis, pad_mode))

    C = __trim_stack(cqt_resp, n_bins)

    if scale:
        lengths = filters.constant_q_lengths(sr,
                                             fmin,
                                             n_bins=n_bins,
                                             bins_per_octave=bins_per_octave,
                                             tuning=tuning,
                                             window=window,
                                             filter_scale=filter_scale)
        C /= np.sqrt(lengths[:, np.newaxis])

    return C
Beispiel #17
0
def icqt_tf(C,
            y,
            added_samples,
            sr=22050,
            hop_length=512,
            fmin=None,
            n_bins=84,
            bins_per_octave=12,
            filter_scale=1,
            norm=1,
            sparsity=0.01,
            window='hann',
            scale=True,
            pad_mode='reflect',
            use_smoothing=True,
            n_samples_total=None):

    tuning = 0.0
    # How many octaves are we dealing with?
    n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))
    n_filters = min(bins_per_octave, n_bins)

    if scale:
        lengths = filters.constant_q_lengths(sr,
                                             fmin,
                                             n_bins=n_bins,
                                             bins_per_octave=bins_per_octave,
                                             tuning=tuning,
                                             window=window,
                                             filter_scale=filter_scale)
        lengths_tf = tf.constant(lengths.astype('complex64'),
                                 dtype=tf.complex64)
        C *= tf.sqrt(lengths_tf[:, tf.newaxis])

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    # First thing, get the freqs of the top octave
    freqs = cqt_frequencies(n_bins, fmin,
                            bins_per_octave=bins_per_octave)[-bins_per_octave:]

    fmin_t = np.min(freqs)
    fmax_t = np.max(freqs)

    # Determine required resampling quality
    Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1)
    filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q)
    nyquist = sr / 2.0
    if filter_cutoff < audio.BW_FASTEST * nyquist:
        res_type = 'kaiser_fast'
    else:
        res_type = 'kaiser_best'

    y, sr, hop_length = __early_downsample(y, sr, hop_length, res_type,
                                           n_octaves, nyquist, filter_cutoff,
                                           scale)

    cqt_resp = []

    for i in range(n_octaves):
        cqt_resp += [
            C[:, i * bins_per_octave:i * bins_per_octave + bins_per_octave, :]
        ]

    cqt_resp = cqt_resp[::-1]

    if n_samples_total == None:
        n_bins = cqt_resp[0].get_shape().as_list()[-1]
        n_samples_total = hop_length * n_bins
    print('n_samples_total:', n_samples_total)

    if res_type != 'kaiser_fast':

        # Do the top octave before resampling to allow for fast resampling
        fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                               fmin_t,
                                               n_filters,
                                               bins_per_octave,
                                               tuning,
                                               filter_scale,
                                               norm,
                                               sparsity,
                                               window=window)

        fft_basis = np.linalg.pinv(fft_basis)
        fft_basis_tf = tf.transpose(tf.constant(fft_basis.astype(
            np.complex64)))
        # Compute the CQT filter response and append it to the stack
        y = __icqt_response_tf(cqt_resp[0], n_fft, hop_length, fft_basis_tf,
                               pad_mode, added_samples[0])

        y = tf.image.resize_images(y[:, :, tf.newaxis, tf.newaxis],
                                   [n_samples_total, 1])[:, :, 0, 0]
        fmin_t /= 2
        fmax_t /= 2
        n_octaves -= 1

        filter_cutoff = fmax_t * (1 +
                                  0.5 * filters.window_bandwidth(window) / Q)

        res_type = 'kaiser_fast'

    # Make sure our hop is long enough to support the bottom octave
    num_twos = __num_two_factors(hop_length)
    if num_twos < n_octaves - 1:
        raise ParameterError('hop_length must be a positive integer '
                             'multiple of 2^{0:d} for {1:d}-octave CQT'.format(
                                 n_octaves - 1, n_octaves))

    # Now do the recursive bit
    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin_t,
                                           n_filters,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           window=window)

    fft_basis_tf = tf.transpose(
        tf.constant(np.linalg.pinv(fft_basis.astype(np.complex64))))

    my_y, my_sr, my_hop = y, sr, hop_length

    # Iterate down the octaves
    for i in range(n_octaves):

        # Resample (except first time)
        if i > 0:

            #my_y = audio_resample_tf(my_y, my_sr, my_sr/2.0,
            #                      res_type=res_type,
            #                      scale=True, use_smoothing=use_smoothing)

            # The re-scale the filters to compensate for downsampling
            my_sr /= 2.0
            my_hop //= 2

            ratio = float(sr) / my_sr

            # Compute the cqt filter response and append to the stack
            my_y = __icqt_response_tf(cqt_resp[i + 1], n_fft, my_hop,
                                      fft_basis_tf / np.sqrt(ratio), pad_mode,
                                      added_samples[i + 1])
            my_y = tf.image.resize_images(
                my_y[:, :, tf.newaxis, tf.newaxis],
                [n_samples_total, 1])[:, :, 0, 0] / np.sqrt(ratio)

            y += my_y

        else:
            my_y = __icqt_response_tf(cqt_resp[i + 1], n_fft, my_hop,
                                      fft_basis_tf, pad_mode,
                                      added_samples[i + 1])
            my_y = tf.image.resize_images(my_y[:, :, tf.newaxis, tf.newaxis],
                                          [n_samples_total, 1])[:, :, 0, 0]
            y += my_y

        #print('Octave:',i)
        #print('y.size:', my_y.get_shape().as_list())
        #print('SR:', my_sr)
        #print('Hop:', my_hop)
        #print('New SR:',sr)
    return y