def __test(tuning, accidental, octave, round_midi):

        note = 'A{:s}'.format(accidental)

        if octave is not None:
            note = '{:s}{:d}'.format(note, octave)
        else:
            octave = 0

        if tuning is not None:
            note = '{:s}{:+d}'.format(note, tuning)
        else:
            tuning = 0

        if round_midi:
            tuning = np.around(tuning, -2)

        hz_true = 440.0 * (2.0**(tuning * 0.01 / 12)) * (2.0**(octave - 4))

        if accidental == '#':
            hz_true *= 2.0**(1./12)
        elif accidental in list('b!'):
            hz_true /= 2.0**(1./12)

        hz = librosa.note_to_hz(note, round_midi=round_midi)
        assert np.allclose(hz, hz_true)

        hz = librosa.note_to_hz([note], round_midi=round_midi)
        assert np.allclose(hz[0], hz_true)
Example #2
0
def make_signal(sr, duration, fmax="C8"):
    """ Generates a linear sine sweep """

    fmin = librosa.note_to_hz("C1") / sr
    if fmax is None:
        fmax = 0.5
    else:
        fmax = librosa.note_to_hz(fmax) / sr

    return np.sin(np.cumsum(2 * np.pi * np.logspace(np.log10(fmin), np.log10(fmax), num=duration * sr)))
Example #3
0
def make_signal(sr, duration, fmax='C8'):
    ''' Generates a linear sine sweep '''

    fmin = librosa.note_to_hz('C1') / sr
    if fmax is None:
        fmax = 0.5
    else:
        fmax = librosa.note_to_hz(fmax) / sr

    return np.sin(np.cumsum(2 * np.pi * np.logspace(np.log10(fmin), np.log10(fmax),
                                                    num=duration * sr)))
Example #4
0
def test_cqt():

    sr = 11025

    # Impulse train
    y = np.zeros(int(5.0 * sr))
    y[::sr] = 1.0


    # Hop size not long enough for num octaves
    # num_octaves = 6, 2**6 = 64 > 32
    yield (raises(librosa.ParameterError)(__test_cqt_size), y, sr, 32, None, 72,
           12, 0.0, 2, None, 1, 0.01)

    # Filters go beyond Nyquist. 500 Hz -> 4 octaves = 8000 Hz > 11000 Hz
    yield (raises(librosa.ParameterError)(__test_cqt_size), y, sr, 512, 500, 48,
           12, 0.0, 2, None, 1, 0.01)

    # Test with fmin near Nyquist
    for fmin in [3000, 4800]:
        for n_bins in [1, 2]:
            for bins_per_octave in [12]:
                yield (__test_cqt_size, y, sr, 512, fmin, n_bins,
                       bins_per_octave, 0.0, 2, None, 1, 0.01)

    # Test for no errors and correct output size
    for fmin in [None, librosa.note_to_hz('C2')]:
        for n_bins in [1, 12, 24, 48, 72, 74, 76]:
            for bins_per_octave in [12, 24]:
                for tuning in [None, 0, 0.25]:
                    for resolution in [1, 2]:
                        for norm in [1, 2]:
                            yield (__test_cqt_size, y, sr, 512, fmin, n_bins,
                                   bins_per_octave, tuning,
                                   resolution, None, norm, 0.01)
Example #5
0
def test_cqt_white_noise():

    def __test(fmin, n_bins, scale, sr, y):

        C = np.abs(librosa.cqt(y=y, sr=sr,
                               fmin=fmin,
                               n_bins=n_bins,
                               scale=scale))

        if not scale:
            lengths = librosa.filters.constant_q_lengths(sr, fmin,
                                                         n_bins=n_bins)
            C /= np.sqrt(lengths[:, np.newaxis])

        # Only compare statistics across the time dimension
        # we want ~ constant mean and variance across frequencies
        assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1)
        assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)

    srand()
    for sr in [22050]:
        y = np.random.randn(30 * sr)

        for scale in [False, True]:
            for fmin in librosa.note_to_hz(['C1', 'C2']):
                for n_octaves in range(2, 4):
                    yield __test, fmin, n_octaves * 12, scale, sr, y
Example #6
0
def test_hcqt_white_noise():

    def __test(fmin, n_bins, scale, sr, y):

        C = librosa.hybrid_cqt(y=y, sr=sr,
                               fmin=fmin,
                               n_bins=n_bins,
                               scale=scale)

        if not scale:
            lengths = librosa.filters.constant_q_lengths(sr, fmin,
                                                         n_bins=n_bins)
            C /= np.sqrt(lengths[:, np.newaxis])

        assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1)
        assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)

    srand()
    for sr in [22050]:
        y = np.random.randn(30 * sr)

        for scale in [False, True]:
            for fmin in librosa.note_to_hz(['C1', 'C2']):
                for n_octaves in [6, 7]:
                    yield __test, fmin, n_octaves * 12, scale, sr, y
Example #7
0
def test_constant_q():

    def __test(sr, fmin, n_bins, bins_per_octave, tuning, resolution,
               pad_fft, norm):

        F, lengths = librosa.filters.constant_q(sr,
                                                fmin=fmin,
                                                n_bins=n_bins,
                                                bins_per_octave=bins_per_octave,
                                                tuning=tuning,
                                                resolution=resolution,
                                                pad_fft=pad_fft,
                                                norm=norm)

        assert np.all(lengths <= F.shape[1])

        eq_(len(F), n_bins)

        if not pad_fft:
            return

        eq_(np.mod(np.log2(F.shape[1]), 1.0), 0.0)

        # Check for vanishing negative frequencies
        F_fft = np.abs(np.fft.fft(F, axis=1))
        # Normalize by row-wise peak
        F_fft = F_fft / np.max(F_fft, axis=1, keepdims=True)
        assert not np.any(F_fft[:, -F_fft.shape[1]//2:] > 1e-4)

    sr = 11025

    # Try to make a cq basis too close to nyquist
    yield (raises(librosa.ParameterError)(__test), sr, sr/2.0, 1, 12, 0, 1, True, 1)

    # with negative fmin
    yield (raises(librosa.ParameterError)(__test), sr, -60, 1, 12, 0, 1, True, 1)

    # with negative bins_per_octave
    yield (raises(librosa.ParameterError)(__test), sr, 60, 1, -12, 0, 1, True, 1)

    # with negative bins
    yield (raises(librosa.ParameterError)(__test), sr, 60, -1, 12, 0, 1, True, 1)

    # with negative resolution
    yield (raises(librosa.ParameterError)(__test), sr, 60, 1, 12, 0, -1, True, 1)

    # with negative norm
    yield (raises(librosa.ParameterError)(__test), sr, 60, 1, 12, 0, 1, True, -1)

    for fmin in [None, librosa.note_to_hz('C3')]:
        for n_bins in [12, 24]:
            for bins_per_octave in [12, 24]:
                for tuning in [0, 0.25]:
                    for resolution in [1, 2]:
                        for norm in [1, 2]:
                            for pad_fft in [False, True]:
                                yield (__test, sr, fmin, n_bins,
                                       bins_per_octave, tuning,
                                       resolution, pad_fft,
                                       norm)
Example #8
0
def test_cqt():

    sr = 11025
    duration = 5.0

    y = make_signal(sr, duration)

    # incorrect hop length for a 6-octave analysis
    # num_octaves = 6, 2**(6-1) = 32 > 16
    for hop_length in [-1, 0, 16, 63, 65]:
        yield (raises(librosa.ParameterError)(__test_cqt_size), y, sr, hop_length, None, 72,
               12, 0.0, 2, None, 1, 0.01)

    # Filters go beyond Nyquist. 500 Hz -> 4 octaves = 8000 Hz > 11000 Hz
    yield (raises(librosa.ParameterError)(__test_cqt_size), y, sr, 512, 500, 4 * 12,
           12, 0.0, 2, None, 1, 0.01)

    # Test with fmin near Nyquist
    for fmin in [3000, 4800]:
        for n_bins in [1, 2]:
            for bins_per_octave in [12]:
                yield (__test_cqt_size, y, sr, 512, fmin, n_bins,
                       bins_per_octave, 0.0, 2, None, 1, 0.01)

    # Test for no errors and correct output size
    for fmin in [None, librosa.note_to_hz('C2')]:
        for n_bins in [1, 12, 24, 48, 72, 74, 76]:
            for bins_per_octave in [12, 24]:
                for tuning in [None, 0, 0.25]:
                    for resolution in [1, 2]:
                        for norm in [1, 2]:
                            yield (__test_cqt_size, y, sr, 512, fmin, n_bins,
                                   bins_per_octave, tuning,
                                   resolution, None, norm, 0.01)
Example #9
0
    def __test(target_hz, resolution, bins_per_octave, tuning):

        y = np.sin(2 * np.pi * target_hz * t)
        tuning_est = librosa.estimate_tuning(resolution=resolution,
                                             bins_per_octave=bins_per_octave,
                                             y=y,
                                             sr=sr,
                                             n_fft=2048,
                                             fmin=librosa.note_to_hz('C4'),
                                             fmax=librosa.note_to_hz('G#9'))

        # Round to the proper number of decimals
        deviation = np.around(np.abs(tuning - tuning_est),
                              int(-np.log10(resolution)))

        # We'll accept an answer within three bins of the resolution
        assert deviation <= 3 * resolution
Example #10
0
def test_hybrid_cqt():
    # This test verifies that hybrid and full cqt agree down to 1e-4
    # on 99% of bins which are nonzero (> 1e-8) in either representation.

    sr = 11025
    duration = 5.0

    y = make_signal(sr, duration, None)

    def __test(hop_length, fmin, n_bins, bins_per_octave,
               tuning, resolution, norm, sparsity, res_type):

        C2 = librosa.hybrid_cqt(y, sr=sr,
                                hop_length=hop_length,
                                fmin=fmin, n_bins=n_bins,
                                bins_per_octave=bins_per_octave,
                                tuning=tuning, filter_scale=resolution,
                                norm=norm,
                                sparsity=sparsity, res_type=res_type)

        C1 = np.abs(librosa.cqt(y, sr=sr,
                                hop_length=hop_length,
                                fmin=fmin, n_bins=n_bins,
                                bins_per_octave=bins_per_octave,
                                tuning=tuning, filter_scale=resolution,
                                norm=norm,
                                sparsity=sparsity, res_type=res_type))

        assert C1.shape == C2.shape

        # Check for numerical comparability
        idx1 = (C1 > 1e-4 * C1.max())
        idx2 = (C2 > 1e-4 * C2.max())

        perc = 0.99

        thresh = 1e-3

        idx = idx1 | idx2

        assert np.percentile(np.abs(C1[idx] - C2[idx]),
                             perc) < thresh * max(C1.max(), C2.max())

    for fmin in [None, librosa.note_to_hz('C2')]:
        for n_bins in [1, 12, 24, 48, 72, 74, 76]:
            for bins_per_octave in [12, 24]:
                for tuning in [None, 0, 0.25]:
                    for resolution in [1, 2]:
                        for norm in [1, 2]:
                            for res_type in [None, 'polyphase']:
                                yield (__test, 512, fmin, n_bins,
                                        bins_per_octave, tuning,
                                        resolution, norm, 0.01, res_type)
Example #11
0
def test_hybrid_cqt():

    sr = 11025

    # Impulse train
    y = np.zeros(int(5.0 * sr))
    y[::sr] = 1.0

    def __test(hop_length, fmin, n_bins, bins_per_octave, tuning, resolution, norm, sparsity):

        C2 = librosa.hybrid_cqt(
            y,
            sr=sr,
            hop_length=hop_length,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            resolution=resolution,
            norm=norm,
            sparsity=sparsity,
        )

        C1 = librosa.cqt(
            y,
            sr=sr,
            hop_length=hop_length,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            resolution=resolution,
            norm=norm,
            sparsity=sparsity,
        )

        eq_(C1.shape, C2.shape)

        # Check for numerical comparability
        assert np.mean(np.abs(C1 - C2)) < 1e-3

    # Hop size not long enough for num octaves
    # num_octaves = 6, 2**(72/12) = 64 > 32
    yield (raises(librosa.ParameterError)(__test), 32, None, 72, 12, 0.0, 2, 1, 0.01)

    for fmin in [None, librosa.note_to_hz("C2")]:
        for n_bins in [1, 12, 24, 48, 72, 74, 76]:
            for bins_per_octave in [12, 24]:
                for tuning in [None, 0, 0.25]:
                    for resolution in [1, 2]:
                        for norm in [1, 2]:
                            yield (__test, 512, fmin, n_bins, bins_per_octave, tuning, resolution, norm, 0.01)
Example #12
0
    def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3, fmin=None):

        super(CQT, self).__init__(name, sr, hop_length)

        if fmin is None:
            fmin = librosa.note_to_hz('C1')

        self.n_octaves = n_octaves
        self.over_sample = over_sample
        self.fmin = fmin

        self.register('mag', [None, n_octaves * 12 * over_sample], np.float32)
        self.register('phase', [None, n_octaves * 12 * over_sample], np.float32)
Example #13
0
File: pre.py Project: EQ4/crema
    def __init__(self, sr=32768, hop_length=1024, n_octaves=8, over_sample=3, fmin=None, dtype=np.float32):

        self.sr = sr
        self.hop_length = hop_length
        self.n_octaves = n_octaves
        self.over_sample = over_sample

        if fmin is None:
            fmin = librosa.note_to_hz('C1')

        self.fmin = fmin

        self.dtype = dtype
Example #14
0
def test_hybrid_cqt():

    sr = 11025

    # Impulse train
    y = np.zeros(int(5.0 * sr))
    y[::sr] = 1.0

    def __test(hop_length, fmin, n_bins, bins_per_octave,
               tuning, resolution, norm, sparsity):

        C2 = librosa.hybrid_cqt(y, sr=sr,
                                hop_length=hop_length,
                                fmin=fmin, n_bins=n_bins,
                                bins_per_octave=bins_per_octave,
                                tuning=tuning, resolution=resolution,
                                norm=norm,
                                sparsity=sparsity)

        C1 = librosa.cqt(y, sr=sr,
                         hop_length=hop_length,
                         fmin=fmin, n_bins=n_bins,
                         bins_per_octave=bins_per_octave,
                         tuning=tuning, resolution=resolution,
                         norm=norm,
                         sparsity=sparsity)

        eq_(C1.shape, C2.shape)

        # Check for numerical comparability
        assert np.mean(np.abs(C1 - C2)) < 1e-3

    for fmin in [None, librosa.note_to_hz('C2')]:
        for n_bins in [1, 12, 24, 48, 72, 74, 76]:
            for bins_per_octave in [12, 24]:
                for tuning in [None, 0, 0.25]:
                    for resolution in [1, 2]:
                        for norm in [1, 2]:
                            yield (__test, 512, fmin, n_bins,
                                   bins_per_octave, tuning,
                                   resolution, norm, 0.01)
Example #15
0
def fcqt(time_signal,
         fs,
         q_rate=q_rate_def,
         fmin=None,
         fratio=note_resolution,
         spThresh=0.0054,
         num_oct=6):
    # fminの定義
    if fmin is None:
        fmin = librosa.note_to_hz("C1")

    # フレーム移動量は固定
    nhop = CQTHOP

    # Calculate Constant-Q Properties
    freqs = GenMusicalScale(fmin, note_resolution, num_oct)  # 各周波数ビンの中心周波数
    nfreq = int(num_oct * note_resolution)  # 周波数ビンの個数定義
    Q = int(
        (1. / ((2**(1. / fratio)) - 1)) * q_rate)  # Eq.(2) Q Value from 1992

    sig_len = len(time_signal)  # サンプル数
    nframe = int(sig_len / nhop)  # フレーム数

    # N  > max(N_k)
    fftLen = int(2**(ceil(log2(int(float(fs * Q) / freqs[0])))))  #
    h_fftLen = fftLen / 2

    fftLen = int(2**(ceil(log2(int(float(fs * Q) / freqs[0])))))
    h_fftLen = int(fftLen / 2)

    # ===================
    #  カーネル行列の計算
    # ===================
    sparseKernel = zeros([nfreq, fftLen], dtype=complex128)
    for k in range(nfreq):
        tmpKernel = zeros(fftLen, dtype=complex128)
        freq = freqs[k]
        # N_k
        N_k = int(float(fs * Q) / freq)
        # FFT窓の中心を解析部分に合わせる.
        startWin = int((fftLen - N_k) / 2)
        tmpKernel[startWin:startWin + N_k] = (hammingWindow(N_k) / N_k) * exp(
            two_pi_j * Q * arange(N_k, dtype=float64) / N_k)
        # FFT (kernel matrix)
        sparseKernel[k] = np.fft.fft(tmpKernel)

    ### 十分小さい値を0にする
    sparseKernel[abs(sparseKernel) <= spThresh] = 0

    ### スパース行列に変換する
    sparseKernel = csr_matrix(sparseKernel)
    ### 複素共役にする
    sparseKernel = sparseKernel.conjugate() / fftLen

    # ===========
    #  Execution
    # ===========
    ### New signal (for Calculation)
    new_sig = zeros(len(time_signal) + fftLen, dtype=float64)
    new_sig[h_fftLen:-h_fftLen] = time_signal

    ret = zeros([nframe, nfreq], dtype=complex128)
    for iiter in tqdm(range(nframe)):
        istart = iiter * nhop
        iend = istart + fftLen
        # FFT (input signal)?
        sig_fft = np.fft.fft(new_sig[istart:iend])
        # 行列積?
        ret[iiter] = sig_fft * sparseKernel.T

    return ret, freqs
Example #16
0
def get_first_null_f0(items_handler: ItemsHandler,
                      start_offset: float,
                      min_duration: float,
                      end_offset: ty.Optional[float] = None,
                      min_note: str = 'C1',
                      max_note: str = 'C7',
                      frame_length: float = 2048,
                      win_length: ty.Optional[float] = None,
                      offset_units: LengthUnit = LengthUnit.ms,
                      length_units: LengthUnit = LengthUnit.samples) -> float:
    audio = items_handler.load_audio()[0]
    sr = items_handler.sr

    if length_units != LengthUnit.samples:
        if length_units != LengthUnit.ms:
            raise TypeError('length_units can be only of ms or samples')
        frame_length = length_convert(frame_length, sr, length_units,
                                      LengthUnit.samples)

        if win_length:
            win_length = length_convert(win_length, sr, length_units,
                                        LengthUnit.samples)
    hop_length = int(frame_length // 4)
    start_offset_int = ty.cast(
        int, length_convert(start_offset, sr, offset_units,
                            LengthUnit.samples))

    if start_offset_int:
        audio = audio[start_offset_int:]  # type:ignore
    if end_offset:
        end_offset_int = ty.cast(
            int,
            length_convert(end_offset, sr, offset_units, LengthUnit.samples))
        audio = audio[:end_offset_int - start_offset_int]  # type:ignore
    min_duration_frms = length_convert(min_duration,
                                       sr,
                                       offset_units,
                                       LengthUnit.frames,
                                       hop_length=hop_length)
    fmin, fmax = lr.note_to_hz(min_note), lr.note_to_hz(max_note)
    f0s, v_flag, v_prob = lr.pyin(
        audio,
        fmin=fmin,
        fmax=fmax,
        sr=sr,
        win_length=None if win_length is None else win_length,
        frame_length=frame_length,
    )
    # print(list(zip(f0s, v_flag)))
    nulls = np.where(~v_flag)
    # print(nulls)
    for idx, val in enumerate(nulls[0]):
        # print(val)
        if val >= min_duration_frms:
            # print(val, v_flag[val + 1])
            if v_flag[val + 1]:
                # print(f'skipping {val}')
                continue
            break

    if val < 5:
        raise PitchError(
            f'Cannot find null f0 at the reasonable frame (>=5): {v_flag}')
    val_normalized = length_convert(val,
                                    sr,
                                    LengthUnit.frames,
                                    offset_units,
                                    hop_length=hop_length)
    # print(val_normalized, )
    return start_offset + val_normalized
Example #17
0
def save_rainbowgram_plot(audio,
                          sample_rate: int = 16000,
                          filename: str = None,
                          output_dir: str = "output") -> None:
    """
  Saves the spectrogram plot of the given audio to the given filename in
  the given output_dir. The resulting plot is a Constant-Q transform (CQT)
  spectrogram with the vertical axis being the amplitude converted to
  dB-scale, and the intensity of lines proportional to the log magnitude of
  the power spectrum and the color given by the derivative of the phase,
  making the phase visible as "rainbow colors", hence the affective name
  "rainbowgrams" (given by the Magenta team).

  :param audio: the audio content, as a floating point time series
  :param sample_rate: the sampling rate of the file
  :param filename: the optional filename, set to "%Y-%m-%d_%H%M%S".png if None
  :param output_dir: the output dir
  """
    os.makedirs(output_dir, exist_ok=True)

    # Configuration from https://arxiv.org/abs/1704.01279
    # and https://gist.github.com/jesseengel/e223622e255bd5b8c9130407397a0494
    peak = 70
    hop_length = 256
    over_sample = 4
    res_factor = 0.8
    octaves = 6
    notes_per_octave = 10
    color_dict = {
        "red": ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        "green": ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        "blue": ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        "alpha": ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0))
    }
    color_mask = LinearSegmentedColormap("ColorMask", color_dict)
    plt.register_cmap(cmap=color_mask)

    # Init subplots, there is only one plot but we have to use 2 cmap,
    # which means 2 call to ax.matshow that wouldn"t work with a single plot.
    fig, ax = plt.subplots()
    plt.axis("off")

    bins_per_octave = int(notes_per_octave * over_sample)
    num_bins = int(octaves * notes_per_octave * over_sample)
    constant_q_transform = librosa.cqt(audio,
                                       sr=sample_rate,
                                       hop_length=hop_length,
                                       bins_per_octave=bins_per_octave,
                                       n_bins=num_bins,
                                       filter_scale=res_factor,
                                       fmin=librosa.note_to_hz("C2"))
    mag, phase = librosa.core.magphase(constant_q_transform)
    phase_angle = np.angle(phase)
    phase_unwrapped = np.unwrap(phase_angle)
    dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
    dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
    mag = (librosa.amplitude_to_db(mag, amin=1e-13, top_db=peak, ref=np.max) /
           peak) + 1
    ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
    ax.matshow(mag[::-1, :], cmap=color_mask)

    if not filename:
        date_and_time = time.strftime("%Y-%m-%d_%H%M%S")
        filename = f"{date_and_time}.png"
    path = os.path.join(output_dir, filename)
    plt.savefig(fname=path, dpi=600)
    plt.close(fig)
Example #18
0
def probabilities(y, note_min, note_max, sr, frame_length, window_length,
                  hop_length, pitch_acc, voiced_acc, onset_acc, spread):
    """
    Estimate prior (observed) probabilities from audio signal
    

    Parameters
    ----------
    y : 1-D numpy array
        Array containing audio samples
        
    note_min : string, 'A#4' format
        Lowest note supported by this estimator
    note_max : string, 'A#4' format
        Highest note supported by this estimator
    sr : int
        Sample rate.
    frame_length : int 
    window_length : int
    hop_length : int
        Parameters for FFT estimation
    pitch_acc : float, between 0 and 1
        Probability (estimated) that the pitch estimator is correct.
    voiced_acc : float, between 0 and 1
        Estimated accuracy of the "voiced" parameter.
    onset_acc : float, between 0 and 1
        Estimated accuracy of the onset detector.
    spread : float, between 0 and 1
        Probability that the singer/musician had a one-semitone deviation
        due to vibrato or glissando.

    Returns
    -------
    P : 2D numpy array.
        P[j,t] is the prior probability of being in state j at time t.

    """

    fmin = librosa.note_to_hz(note_min)
    fmax = librosa.note_to_hz(note_max)
    midi_min = librosa.note_to_midi(note_min)
    midi_max = librosa.note_to_midi(note_max)
    n_notes = midi_max - midi_min + 1

    # F0 and voicing
    f0, voiced_flag, voiced_prob = librosa.pyin(y, fmin * 0.9, fmax * 1.1, sr,
                                                frame_length, window_length,
                                                hop_length)
    tuning = librosa.pitch_tuning(f0)
    f0_ = np.round(librosa.hz_to_midi(f0 - tuning)).astype(int)
    onsets = librosa.onset.onset_detect(y,
                                        sr=sr,
                                        hop_length=hop_length,
                                        backtrack=True)

    P = np.ones((n_notes * 2 + 1, len(f0)))

    for t in range(len(f0)):
        # probability of silence or onset = 1-voiced_prob
        # Probability of a note = voiced_prob * (pitch_acc) (estimated note)
        # Probability of a note = voiced_prob * (1-pitch_acc) (estimated note)
        if voiced_flag[t] == False:
            P[0, t] = voiced_acc
        else:
            P[0, t] = 1 - voiced_acc

        for j in range(n_notes):
            if t in onsets:
                P[(j * 2) + 1, t] = onset_acc
            else:
                P[(j * 2) + 1, t] = 1 - onset_acc

            if j + midi_min == f0_[t]:
                P[(j * 2) + 2, t] = pitch_acc

            elif np.abs(j + midi_min - f0_[t]) == 1:
                P[(j * 2) + 2, t] = pitch_acc * spread

            else:
                P[(j * 2) + 2, t] = 1 - pitch_acc

    return P
Example #19
0
from keras.layers import *
from keras.models import Model
from keras.utils import plot_model
from numpy.lib.stride_tricks import as_strided  
import matplotlib.pyplot as plt
import librosa.display
import IPython.display as ipd
# Reference [d]: Classical MIDI Files, https://www.mfiles.co.uk/classical-midi.htm

model = None
history = None
trainx, trainy = ([], [])
validx, validy = ([], [])

BINS = 88
FREF = librosa.note_to_hz('A0')  # Reference frequency = 27.50 Hz -> A0
# For 84 frequency bins:
# FREF = librosa.note_to_hz('C1')  # Reference frequency = 32.70 Hz -> C1

hPi = np.zeros(BINS) # HMM Initial state (note) probabilities
hSteps = np.zeros(BINS*2-1) # Transition steps
hA = np.zeros([BINS, BINS]) # Transition matrix
hB = np.zeros([BINS, BINS]) # Emission matrix

fbins = librosa.cqt_frequencies(BINS, fmin=FREF)
FMAX = fbins[BINS-1]
STD = 25 # Standard deviation for the probability vector
Sr = 16000

def sample(filename):
    global trainx, trainy, validx, validy, Sr
def detectionOnsets(y):
    fmin = librosa.note_to_hz(Notemin)
    fmax = librosa.note_to_hz(Notemax)
    #Nmin = int((sr/(fmax*(2**(1/BINS_PER_OCTAVE)-1))))
    #Nmax = int((sr/(fmin*(2**(1/BINS_PER_OCTAVE)-1))))
    n_bins = int(
        (librosa.note_to_midi(Notemax) - librosa.note_to_midi(Notemin)) *
        BINS_PER_OCTAVE / 12)
    Chrom = librosa.amplitude_to_db(np.abs(
        librosa.cqt(y=y,
                    sr=sr,
                    hop_length=STEP,
                    fmin=fmin,
                    bins_per_octave=BINS_PER_OCTAVE,
                    n_bins=n_bins)),
                                    ref=np.max)
    Nf = len(Chrom)
    N = len(Chrom[0])
    Diff = np.zeros((Nf, N))
    Dev = np.zeros(N)
    for j in range(1, N):
        for i in range(Nf):
            Diff[i, j] = np.abs(Chrom[i, j] - Chrom[i, j - 1])
            Dev[j] = sum(Diff[:, j])

    # FONCTION DE SEUIL
    # Ajout de zéros en queue et en tête
    l = []
    Seuil = []
    Onsets = []
    for k in range(int(H / 2)):
        l.append(0)
    for val in Dev:
        l.append(val)
    for k in range(int(H / 2)):
        l.append(0)
    #Calcul de la médiane
    for i in range(N):
        Seuil.append(ALPHA + BETA * stat.median(l[i:i + H]))
        if Dev[i] > Seuil[i]:
            Onsets.append(i)

    times = librosa.frames_to_time(np.arange(N), sr=sr, hop_length=STEP)

    # FONCTION DE TRI SUR LES  ONSETS
    i = 0
    while i < (len(Onsets) - 1):
        while (i < (len(Onsets) - 1)) and (times[Onsets[i + 1]] <
                                           times[Onsets[i]] + T):
            if Dev[Onsets[i + 1]] < Dev[Onsets[i]]: del Onsets[i + 1]
            else: del Onsets[i]
        i = i + 1

    onset_frames = librosa.util.fix_frames(Onsets,
                                           x_min=0,
                                           x_max=Chrom.shape[1] - 1)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=STEP)

    #Synchronisation sur les onsets, en enlevant le début et la fin des longues frames
    ChromSync = np.zeros((Nf, len(onset_frames) - 1))
    n_att = int(librosa.time_to_frames(T_att, sr=sr, hop_length=STEP))
    for j in range(len(onset_frames) - 1):
        for i in range(Nf):
            ChromSync[i, j] = np.mean(Chrom[i][(onset_frames[j] +
                                                n_att):(onset_frames[j + 1] -
                                                        n_att)])

    #Normalisation du spectre


#    ChromSync[:,1] = librosa.power_to_db(librosa.db_to_power(ChromSync[:,1]) / np.sum(librosa.db_to_power(ChromSync[:,1])))
    if norm_spectre:
        for j in range(ChromSync.shape[1]):
            ChromSync[:, j] = librosa.power_to_db(
                librosa.db_to_power(ChromSync[:, j]) /
                np.sum(librosa.db_to_power(ChromSync[:, j])))

    #Affichage
    if plot_onsets:
        plt.figure(figsize=(13, 7))
        ax1 = plt.subplot(3, 1, 1)
        librosa.display.specshow(Chrom,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 fmin=fmin,
                                 y_axis='cqt_note',
                                 x_axis='time',
                                 x_coords=times)
        plt.title('CQT spectrogram')

        plt.subplot(3, 1, 2, sharex=ax1)
        plt.plot(times, Dev, label='Deviation')
        plt.plot(times, Seuil, color='g', label='Seuil')
        plt.vlines(times[Onsets],
                   0,
                   Dev.max(),
                   color='r',
                   alpha=0.9,
                   linestyle='--',
                   label='Onsets')
        plt.axis('tight')
        plt.legend(frameon=True, framealpha=0.75)

        ax1 = plt.subplot(3, 1, 3, sharex=ax1)
        librosa.display.specshow(ChromSync,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 fmin=fmin,
                                 y_axis='cqt_note',
                                 x_axis='time',
                                 x_coords=onset_times)
        plt.show()

    return onset_times
Example #21
0
print(pw_l.shape)

ms_l = librosa.feature.melspectrogram(S=pw_l, n_mels=256)
ms_r = librosa.feature.melspectrogram(S=pw_r, n_mels=256)
#by default n_mels=128
print(ms_l.shape)

tranform = np.empty((2, 256, 431))
tranform[0] = ms_l
tranform[1] = ms_r

path_save = data_path + "\\" + save1_path + "\\"
np.save(path_save + 'airport-barcelona-0-0-a.npy', tranform)

#CQT
Cqt = librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('A1'))
print(Cqt.size)
C = np.abs(Cqt)
freqs = librosa.cqt_frequencies(C.shape[0], fmin=librosa.note_to_hz('A1'))
print(freqs.size)
perceptual_Cqt = librosa.perceptual_weighting(C**2, freqs, ref=np.max)

plt.figure()
plt.subplot(2, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(Cqt, ref=np.max),
                         fmin=librosa.note_to_hz('A1'),
                         y_axis='cqt_hz')
plt.title('Log CQT power')
plt.colorbar(format='%+2.0f dB')
plt.subplot(2, 1, 2)
librosa.display.specshow(perceptual_Cqt,
Example #22
0
 def demo(instrument):
     fund_freq = librosa.note_to_hz("C4")
     fname = os.path.join(AUDIO_SAMPLES_DIR, "{}_C4.wav".format(instrument))
     y, sr = librosa.load(fname)
     plot_signal_harmonics(y, sr, fund_freq)
     return Audio(fname)
Example #23
0
def notes_to_audio(automaton=False,
                   function=None,
                   deterministic=True,
                   maxsamplesize=44100):
    if function != None:
        print "###################################################"
        print "Function to Audio"
        print "###################################################"
        print "Function:", function
        #Example:
        #>>> map(lambda x: eval('x*x+x+1'),range(1,10))
        #[3, 7, 13, 21, 31, 43, 57, 73, 91]
        notes = map(lambda x: eval(function), range(0, 44100))
        npnotes = np.asarray(notes)
        #scalednpnotes=np.int16(npnotes/np.max(npnotes)*32767)
        scalednpnotes = npnotes
        print "Notes :", scalednpnotes
        print "Size of scaled notes:", len(scalednpnotes)
        write("function_synthesized_music.wav", maxsamplesize, scalednpnotes)
        return
    if function == None and automaton == False:
        print "###################################################"
        print "Notes to Audio"
        print "###################################################"
        npnotes = np.random.uniform(10, 100, 44100)
        #scalednpnotes=np.int16(npnotes/np.max(npnotes)*32767)
        scalednpnotes = npnotes
        print "Notes :", scalednpnotes
        print "Size of scaled notes:", len(scalednpnotes)
        write("notes_synthesized_music.wav", maxsamplesize, scalednpnotes)
        return
    if automaton == True:
        print "###################################################"
        print "Automaton to Audio"
        print "###################################################"
        states2notes_machine_file = open("NotesStateMachine.txt", "r")
        states2notes_machine = ast.literal_eval(
            states2notes_machine_file.read())
        dfanotes = [
            int(librosa.note_to_hz(states2notes_machine['start-s1']) * 1000)
        ]
        prevstates = ['start']
        iter = 0
        while iter < maxsamplesize - 1:
            possibletransitions = []
            prevprevstates = prevstates
            prevstates = []
            #print "prevstate:",prevstate
            #if 'fs' in prevstate:
            #	break
            for k, v in states2notes_machine.iteritems():
                statetransition = k.split("-")
                if statetransition[0] in prevprevstates:
                    possibletransitions.append(states2notes_machine[k])
                    prevstates.append(statetransition[1])
                    if deterministic:
                        break
            for note in possibletransitions:
                hertz = librosa.note_to_hz(note)
                #print "Hertz:",hertz
                dfanotes.append(int(hertz * 1000))
                #break
            iter += 1
        npnotes = np.array(dfanotes)
        #scalednpnotes=np.int16(npnotes/np.max(npnotes)*32767)
        scalednpnotes = npnotes
        print "Notes :", scalednpnotes
        print "Size of scaled dfanotes:", len(scalednpnotes)
        write("automaton_synthesized_music.wav", maxsamplesize, scalednpnotes)
        return
Example #24
0
def test_constant_q():
    def __test(sr, fmin, n_bins, bins_per_octave, tuning, filter_scale,
               pad_fft, norm):

        F, lengths = librosa.filters.constant_q(
            sr,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            filter_scale=filter_scale,
            pad_fft=pad_fft,
            norm=norm)

        assert np.all(lengths <= F.shape[1])

        eq_(len(F), n_bins)

        if not pad_fft:
            return

        eq_(np.mod(np.log2(F.shape[1]), 1.0), 0.0)

        # Check for vanishing negative frequencies
        F_fft = np.abs(np.fft.fft(F, axis=1))
        # Normalize by row-wise peak
        F_fft = F_fft / np.max(F_fft, axis=1, keepdims=True)
        assert not np.any(F_fft[:, -F_fft.shape[1] // 2:] > 1e-4)

    sr = 11025

    # Try to make a cq basis too close to nyquist
    yield (raises(librosa.ParameterError)(__test), sr, sr / 2.0, 1, 12, 0, 1,
           True, 1)

    # with negative fmin
    yield (raises(librosa.ParameterError)(__test), sr, -60, 1, 12, 0, 1, True,
           1)

    # with negative bins_per_octave
    yield (raises(librosa.ParameterError)(__test), sr, 60, 1, -12, 0, 1, True,
           1)

    # with negative bins
    yield (raises(librosa.ParameterError)(__test), sr, 60, -1, 12, 0, 1, True,
           1)

    # with negative filter_scale
    yield (raises(librosa.ParameterError)(__test), sr, 60, 1, 12, 0, -1, True,
           1)

    # with negative norm
    yield (raises(librosa.ParameterError)(__test), sr, 60, 1, 12, 0, 1, True,
           -1)

    for fmin in [None, librosa.note_to_hz('C3')]:
        for n_bins in [12, 24]:
            for bins_per_octave in [12, 24]:
                for tuning in [0, 0.25]:
                    for filter_scale in [1, 2]:
                        for norm in [1, 2]:
                            for pad_fft in [False, True]:
                                yield (__test, sr, fmin, n_bins,
                                       bins_per_octave, tuning, filter_scale,
                                       pad_fft, norm)
def perceptual_cqt(y,sr):
    C = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('A1')))
    freqs = librosa.cqt_frequencies(C.shape[0], fmin=librosa.note_to_hz('A1'))#Adapted to music
    perceptual_CQT = librosa.perceptual_weighting(C**2, freqs, ref=np.max)# Uses
    return perceptual_CQT
Example #26
0
    # print some signal stuff
    print("x: ", x.shape)
    print("fs: ", fs)
    print("hop: ", hop)
    print("frame length: ", len(x) // hop)

    # extract filename label
    label = extract_label(file)


    # --
    # chroma features

    # calc chroma
    fmin = librosa.note_to_hz('C2')
    chroma = calc_chroma(x, fs, hop, n_octaves=4, bins_per_octave=36, fmin=fmin)


    # --
    # onsets

    h, a, b = 5, 0.09, 0.7

    param_str = '{}_h-{}_a-{}_b-{}'.format(label, h, a, b).replace('.', 'p')

    # calc onsets
    #onsets, onset_times, c, thresh = calc_onsets(x, fs, N=N, hop=hop, adapt_frames=h, adapt_alpha=a, adapt_beta=b)


    # --
Example #27
0
def generate_dataset(mfw, base_path, audio_preview=False, use_cqt=True, interactive_plots=False):
    utils.ensure_parent_exists(base_path)

    path_midi = "{}.mid".format(base_path)
    path_wave = "{}.wav".format(base_path)
    store_midi_and_wave(mfw.midi_file, path_midi, path_wave)

    if audio_preview:
        os.system("audacious '{}' &".format(path_wave))

    sr, wave_data = read_wave(path_wave)

    if use_cqt:
        print("Transforming data")
        bins_per_note = 4
        bins_per_octave = 12 * bins_per_note
        n_octaves = 9
        n_bins = n_octaves * bins_per_octave
        hop_length = 512
        lowest_note_name = "C1"  # cqt default
        lowest_note_hz = librosa.note_to_hz(lowest_note_name)
        lowest_note_midi = librosa.note_to_midi(lowest_note_name)
        # https://librosa.github.io/librosa/generated/librosa.core.cqt.html
        C = cqt(
            wave_data,
            sr=sr,
            fmin=lowest_note_hz,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            hop_length=hop_length,
            filter_scale=1.0,
            #sparsity=0.0,
            tuning=0.0,     # we don't want automatic tuning estimation
        )

        mag = np.abs(C).astype(np.float32)

        # 16th notes at 200 bpm are 800 notes/min = 13.3 notes/sec => note duration = 75 ms
        print("Sample rate: {}".format(sr))
        print("Hop duration: {:.1f} ms".format(hop_length / sr * 1000))
        print("Length audio: {:.1f} sec".format(len(wave_data) / sr))
        print("Shape audio:       {} [{}, {:.1f} MB]".format(
            wave_data.shape, wave_data.dtype, wave_data.nbytes / 1e6))
        print("Shape transformed: {} [{}, {:.1f} MB]".format(
            mag.shape, mag.dtype, mag.nbytes / 1e6))

        # Groundtruth extraction with same shape
        groundtruth = mfw.extract_groundtruth(
            raw_length=len(wave_data),
            sample_rate=sr,
            lowest_note=lowest_note_midi,
            highest_note=lowest_note_midi + n_octaves * 12,
            hop_length=hop_length,
            bins_per_note=4,
        )

        print("Storing dataset")
        path_X = "{}_X.npy".format(base_path)
        path_Y = "{}_Y.npy".format(base_path)
        np.save(path_X, mag)
        np.save(path_Y, groundtruth)

        print("Generating plots")
        plot_dataset(C, groundtruth, base_path, sr, lowest_note_hz, hop_length, bins_per_octave, interactive_plots)

    else:
        np.save("wavedata.npy", data)
        os.system("nim -r c ./src/process_wave_data wavedata.npy")
        data = np.load("wavedata_preprocessed.npy")
        data = data[::-1, :]

        fig, ax = plt.subplots(1, 1, figsize=(16, 10))
        plt.subplots_adjust(left=0.05, bottom=0.05, top=0.95, right=0.95)
        plt.imshow(data, aspect='auto')
        plt.show()
Example #28
0
import os
import os.path as osp
from hashlib import sha1
from subprocess import DEVNULL, call, check_call

import librosa
import numpy as np
from scipy.spatial import ConvexHull

cqt_fmin = librosa.note_to_hz('A1')
ffmpeg_path = './codecs/ffmpeg.exe'


class Codec:  # TODO: add flag indicating whether supporting hires files
    def __init__(self, path, cmd_args):
        '''
        path: path to the codec binary
        cmd_args: (extr) command line args for encoding, used for adjusting compression level
        '''
        self.path = osp.abspath(path)
        if cmd_args is None:
            self.cmd_args = []
        elif isinstance(cmd_args, (list, tuple)):
            self.cmd_args = list(cmd_args)
        else:
            self.cmd_args = [cmd_args]

    def encode(self, fin):
        pass

    def decode(self, fin):
Example #29
0
def beatract(dir_name, file_name=-1, save_dir=-1, addable_option="-n", \
specific=4, threshold_length=8, show_graph=-1, save_graph=-1, debugmode=-1, \
time_variation=0.5):
    '''
    at given dir_name/file_name extract beat and save it to txt file at save to.
    Args:
    Return:
    Raise:
        nothing.
    '''
    # if file_name is default value, check all file in directory.
    if file_name == -1:
        file_names = os.listdir(dir_name)
    else:
        file_names = [file_name]

    # if save_dir is default value, save_dir is in source directory.
    if save_dir == -1:
        save_dir = dir_name

    # now is now beat extracting number.
    now = 0
    for file_name in file_names:
        now += 1
        if debugmode != -1:
            # if debugmode on, write debugging message to console.
            print "Strat extracting " + file_name + "... Now " + str(now) + " / "+  \
            str(len(file_names))

        dest_file = to_wav(dir_name, dir_name, file_name, addable_option)
        # if want to extract some given length, give load to duration value.
        audio_list, sampling_rate = lb.load(dest_file, offset=0.0)
        if debugmode != -1:
            # if debugmode on, write debugging message to console.
            print "file opend..." + "... Now " + str(now) + " / "+  str(len(file_names))

        music = lb.cqt(audio_list, sr=sampling_rate, fmin=lb.note_to_hz('C1'), n_bins=60*specific, \
        bins_per_octave=12*specific)

        if debugmode != -1:
            # if debugmode on, write debugging message to console.
            print "file CQT finished..." + "... Now " + str(now) + " / "+  str(len(file_names))

        threshold = get_threshold(music)
        _, r_harmonic = parse_noise(music, threshold)

        if debugmode != -1:
            # if debugmode on, write debugging message to console.
            print "file CQT harmonics extracted..." + "... Now " + str(now) + " / " + \
            str(len(file_names))

        note = stage_note(r_harmonic)

        _, note_list, icoef_table, _ = bt2.tie_note(note, threshold_length, debug_mode=1)
        weights = bt2.weightract(r_harmonic, note, note_list, icoef_table)

        # Set Time variation for input values.
        real_weights = bt2.set_time_variation(weights, \
        get_music_time(sampling_rate, len(audio_list)), sampling_rate, \
        time_variation=time_variation)

        save_to(save_dir, file_name.split(".")[0] + ".txt", real_weights)

        if debugmode != -1:
            print "finished extract file..." + "... Now " + str(now) + " / "+  str(len(file_names))

        if show_graph != -1:
            # if show graph is on...
            plt.figure()
            plt.plot(real_weights)
            plt.show()

        if save_graph != -1:
            # if save graph is on...
            plt.figure()
            plt.plot(real_weights)
            plt.savefig(str(dir_name)+"/"+str(file_name.split(".")[0] + ".png"))
Example #30
0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import utils

# Define global variables.
CHANNELS = 1
RATE = 44100
FRAMES_PER_BUFFER = 1024 * 17
N_FFT = 4096
SCREEN_WIDTH = 178
ENERGY_THRESHOLD = 0.4
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Choose the frequency range of your log-spectrogram.
F_LO = librosa.note_to_hz('C2')
F_HI = librosa.note_to_hz('C9')
M = librosa.filters.mel(RATE, N_FFT, SCREEN_WIDTH, fmin=F_LO, fmax=F_HI)
p = pyaudio.PyAudio()
loaded_model = tf.keras.models.load_model(
    '/Users/Dodanto/Documents/GitHub/SnapPoint/SnapPoint.h5')


def mfcc(y):
    sample = librosa.feature.mfcc(y=y, sr=44100, n_mfcc=40)
    sample = numpy.expand_dims(sample.T, axis=0)
    return sample


def test(sample):
    data = mfcc(sample)
Example #31
0
    window = getattr(scipy.signal.windows, window_name)

    wdec = librosa.filters.__float_window(window)

    if n == int(n):
        n = int(n)
        assert np.allclose(wdec(n), window(n))
    else:
        wf = wdec(n)
        fn = int(np.floor(n))
        assert not np.any(wf[fn:])


@pytest.mark.parametrize("sr", [11025])
@pytest.mark.parametrize("fmin", [None, librosa.note_to_hz("C3")])
@pytest.mark.parametrize("n_bins", [12, 24])
@pytest.mark.parametrize("bins_per_octave", [12, 24])
@pytest.mark.parametrize("filter_scale", [1, 2])
@pytest.mark.parametrize("norm", [1, 2])
@pytest.mark.parametrize("pad_fft", [False, True])
def test_constant_q(sr, fmin, n_bins, bins_per_octave, filter_scale, pad_fft,
                    norm):

    F, lengths = librosa.filters.constant_q(
        sr=sr,
        fmin=fmin,
        n_bins=n_bins,
        bins_per_octave=bins_per_octave,
        filter_scale=filter_scale,
        pad_fft=pad_fft,
Example #32
0
def segmentation(song, display=False):
    '''
    Takes in a song and then returns a class containing the spectrogram, bpm, and major segments
    It also fills the song's beatTrack and uses it in the segmentation algorithm.
    Algorithm written by: Brian McFee https://bmcfee.github.io/

    :param song: (Song)      | song to segment
    :param display: (bool)   | optional argument to display graph of segments using matPlotLib
    :return: seg_dict (dict) | dictionary of segments
    '''
    import numpy as np
    import scipy
    import matplotlib.pyplot as plt
    import sklearn.cluster

    y = song.load.y
    sr = song.load.sr
    beat_track = song.beat_track

    BINS_PER_OCTAVE = 12 * 3
    N_OCTAVES = 7
    C = librosa.amplitude_to_db(librosa.cqt(y=y,
                                            sr=sr,
                                            bins_per_octave=BINS_PER_OCTAVE,
                                            n_bins=N_OCTAVES *
                                            BINS_PER_OCTAVE),
                                ref=np.max)

    # To reduce dimensionality, we'll beat-synchronous the CQT
    tempo, beats = tuple(beat_track)

    Csync = librosa.util.sync(C, beats, aggregate=np.median)

    #####################################################################
    # Let's build a weighted recurrence matrix using beat-synchronous CQT
    # width=3 prevents links within the same bar
    # mode='affinity' here implements S_rep
    R = librosa.segment.recurrence_matrix(Csync,
                                          width=3,
                                          mode='affinity',
                                          sym=True)

    # Enhance diagonals with a median filter (Equation 2)
    df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
    Rf = df(R, size=(1, 7))

    ###################################################################
    # Now let's build the sequence matrix (S_loc) using mfcc-similarity

    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    Msync = librosa.util.sync(mfcc, beats)

    path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
    sigma = np.median(path_distance)
    path_sim = np.exp(-path_distance / sigma)

    R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)

    ##########################################################
    # And compute the balanced combination

    deg_path = np.sum(R_path, axis=1)
    deg_rec = np.sum(Rf, axis=1)

    mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)

    A = mu * Rf + (1 - mu) * R_path

    #####################################################
    # Now let's compute the normalized Laplacian
    L = scipy.sparse.csgraph.laplacian(A, normed=True)

    # and its spectral decomposition
    evals, evecs = scipy.linalg.eigh(L)

    # We can clean this up further with a median filter.
    # This can help smooth over small discontinuities
    evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))

    # cumulative normalization is needed for symmetric normalize laplacian eigenvectors
    Cnorm = np.cumsum(evecs**2, axis=1)**0.5

    # If we want k clusters, use the first k normalized eigenvectors.
    k = 5

    X = evecs[:, :k] / Cnorm[:, k - 1:k]

    #############################################################
    # Let's use these k components to cluster beats into segments
    KM = sklearn.cluster.KMeans(n_clusters=k)

    seg_ids = KM.fit_predict(X)

    bound_beats = 1 + np.flatnonzero(seg_ids[:-1] != seg_ids[1:])

    bound_beats = librosa.util.fix_frames(bound_beats, x_min=0)

    bound_segs = list(seg_ids[bound_beats])

    bound_frames = beats[bound_beats]

    bound_frames = librosa.util.fix_frames(bound_frames,
                                           x_min=None,
                                           x_max=C.shape[1] - 1)

    bound_tuples = []
    for i in range(1, len(bound_frames)):
        bound_tuples.append((bound_frames[i - 1], bound_frames[i] - 1))
    bound_tuples = tuple(map(lambda x: librosa.frames_to_time(x),
                             bound_tuples))

    pairs = zip(bound_segs, bound_tuples)
    seg_dict = dict()
    for seg, frame in pairs:
        seg_dict.setdefault(seg, []).append(frame)

    if display:
        import matplotlib.patches as patches
        plt.figure(figsize=(12, 4))
        colors = plt.get_cmap('Paired', k)

        bound_times = librosa.frames_to_time(bound_frames)
        freqs = librosa.cqt_frequencies(n_bins=C.shape[0],
                                        fmin=librosa.note_to_hz('C1'),
                                        bins_per_octave=BINS_PER_OCTAVE)

        librosa.display.specshow(C,
                                 y_axis='cqt_hz',
                                 sr=sr,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 x_axis='time')
        ax = plt.gca()

        for interval, label in zip(zip(bound_times, bound_times[1:]),
                                   bound_segs):
            ax.add_patch(
                patches.Rectangle((interval[0], freqs[0]),
                                  interval[1] - interval[0],
                                  freqs[-1],
                                  facecolor=colors(label),
                                  alpha=0.50))

        plt.tight_layout()
        plt.show()

    return seg_dict
Example #33
0
def test_note_to_hz_badnote():
    librosa.note_to_hz("does not pass")
n_components=None
if len(argv) > 3:
    n_components = int(argv[3])

from librosa import load, cqt, logamplitude, note_to_midi, note_to_hz
import numpy as np

# load an audio file (with samplerate)
x, sr = load(filename)

# compute constant-Q transform (~ pitch-based STFT)
#hop_size = 512
pitch_max = note_to_midi('D5')
pitch_min = 'B3'
pitch_min_number = note_to_midi(pitch_min)
C = cqt(x, sr=sr, fmin=note_to_hz(pitch_min), n_bins=pitch_max-pitch_min_number)

# try some midi visualization
from Midi import midi_matrix

midi_mat = midi_matrix(midi_filename, min_pitch=note_to_midi(pitch_min))

# NMF

#V = np.log10(1 + 100000 * C**2)
V = np.abs(C).transpose()

W_zero = np.zeros((pitch_max - pitch_min_number, pitch_max - pitch_min_number))
pitch = pitch_min_number
for comp in W_zero:
    comp[pitch-pitch_min_number] = 1.0
Example #35
0
    def chroma_cqt_processed(self,
                             n_chroma=12,
                             n_octaves=7,
                             bins_per_octave=12,
                             fmin='C1',
                             margin=8,
                             kernel_size=31,
                             power=2.0,
                             mask=False,
                             params=None):
        """
        Adapted from librosa docs
        https://librosa.github.io/librosa_gallery/auto_examples/plot_chroma.html
        """
        from scipy.ndimage import median_filter

        if params is not None:
            if 'fmin' in params.keys():
                if isinstance(params['fmin'], str):
                    fmin = librosa.note_to_hz(params['fmin'])
                else:
                    fmin = params['fmin']
            if 'n_chroma' in params.keys():
                n_chroma = params['n_chroma']
            if 'n_octaves' in params.keys():
                n_octaves = params['n_octaves']
            if 'bins_per_octave' in params.keys():
                bins_per_octave = params['bins_per_octave']
            if 'margin' in params.keys():
                margin = params['margin']
            if 'kernel_size' in params.keys():
                kernel_size = params['kernel_size']
            if 'power' in params.keys():
                power = params['power']
            if 'mask' in params.keys():
                mask = params['mask']
        else:
            if fmin and isinstance(fmin, str):
                fmin = librosa.note_to_hz(fmin)

        harmonic = librosa.effects.harmonic(y=self.audio_vector,
                                            margin=margin,
                                            kernel_size=kernel_size,
                                            power=power,
                                            mask=mask)
        chroma_cqt_harm = librosa.feature.chroma_cqt(
            y=harmonic,
            sr=self.fs,
            bins_per_octave=bins_per_octave,
            hop_length=self.hop_length,
            fmin=fmin,
            n_chroma=n_chroma,
            n_octaves=n_octaves)
        chroma_filter = np.minimum(
            chroma_cqt_harm,
            librosa.decompose.nn_filter(chroma_cqt_harm,
                                        aggregate=np.median,
                                        metric='cosine'))
        chroma_smooth = median_filter(chroma_filter, size=(1, 9))
        return {
            'chroma_filtered': chroma_filter,
            'chroma_smoothed': chroma_smooth
        }
Example #36
0
 def add_pitch(self, **kwargs):
     self.pitch_fmin = kwargs.pop("pitch_fmin", librosa.note_to_hz('C2'))
     self.pitch_fmax = kwargs.pop("pitch_fmax", librosa.note_to_hz('C7'))
     self.pitch_avg = kwargs.pop("pitch_avg", None)
     self.pitch_std = kwargs.pop("pitch_std", None)
     self.pitch_norm = kwargs.pop("pitch_norm", False)
def make_ground():

    # ground_path = '/mnt/Stuff/Acads/UGP/mycode/ground_voc'
    # ground_list = [x.split('.')[0].strip() for x in os.listdir(ground_path)]

    #os.chdir("./ground_voc")

    Fmin = librosa.note_to_hz('C2')
    Fmax = librosa.note_to_hz('C7')

    note = uf(np.arange(int(np.log2(Fmax/Fmin) * 12*1 +1)) , Fmin,1)

    # melody2 = "/mnt/Stuff/Acads/UGP/medleydb/medleydb/data/Annotations/Melody/\
    #             Melody2/"

    # mirk = "/mnt/data/datasets/MIR-1K/"

    bach = "/mnt/data/datasets/Bach10_v1.1/"

    # for song in os.listdir(melody2):
    #     print(song.rsplit(".")[0][:-8])
    #     path = melody2 + song
    #     liz = pd.read_csv(path,names = ['time','freq'])
    #     liz = liz.to_numpy()
    #
    #     #print(liz.shape[0])
    #
    #     N = int(liz.shape[0]/2 +1)
    #     #print(N)
    #
    #     i=0
    #     #ground_liz = np.zeros([61,N])
    #     gl = np.zeros(N)
    #     for x in liz:
    #         if i%2==0:
    #             gl[int(i/2)] = np.argwhere(note == find_nearest(note,x[1]))
    #         #ground_liz[int(gl[int(i/2)]) , int(i/2)] = 1
    #         i+=1
    #     save_path = song.rsplit(".")[0][:-8] + ".npy"
    #     np.save(save_path , gl)
    #     print(" Done.")

    # for file in os.listdir(mirk):
    #     if file.split('.')[1] == 'pv':
    #         path = mirk + file
    #         liz = []
    #         ff = open(path , 'r')
    #         freq = [float(x) for x in ff]
    #
    #         N = len(freq)
    #
    #         print(N)
    #
    #         i = 0
    #         gl = np.zeros(N)
    #         for x in freq:
    #             gl[i] = round(x) #np.argwhere(note == find_nearest(note , x))
    #             i+=1
    #         save_path = file.split('.')[0] + '.npy'
    #         np.save(save_path , gl)
    #         print('Done.')

    for song in os.listdir(bach):
        print(song)
        file = bach + song + '/' + song + '-GTF0s.mat'
        f = loadmat(file)
        f = f['GTF0s']
        i=0
        for ch in ['violin', 'clarinet','saxophone','bassoon']:
            fr = f[i]
            i+=1
            N = fr.shape[0]
            gl = np.zeros(N)
            j=0
            for x in fr:
                gl[j] = x - 36 +1
                j+=1
            save_path = 'ground_bach/' + song  + '-' + ch + '.npy'
            np.save(save_path , gl)
            print('Done.')
Example #38
0
def test_hybrid_cqt():
    # This test verifies that hybrid and full cqt agree down to 1e-4
    # on 99% of bins which are nonzero (> 1e-8) in either representation.

    sr = 11025
    duration = 5.0

    y = make_signal(sr, duration, None)

    def __test(hop_length, fmin, n_bins, bins_per_octave, tuning, resolution,
               norm, sparsity, res_type):

        C2 = librosa.hybrid_cqt(y,
                                sr=sr,
                                hop_length=hop_length,
                                fmin=fmin,
                                n_bins=n_bins,
                                bins_per_octave=bins_per_octave,
                                tuning=tuning,
                                filter_scale=resolution,
                                norm=norm,
                                sparsity=sparsity,
                                res_type=res_type)

        C1 = np.abs(
            librosa.cqt(y,
                        sr=sr,
                        hop_length=hop_length,
                        fmin=fmin,
                        n_bins=n_bins,
                        bins_per_octave=bins_per_octave,
                        tuning=tuning,
                        filter_scale=resolution,
                        norm=norm,
                        sparsity=sparsity,
                        res_type=res_type))

        assert C1.shape == C2.shape

        # Check for numerical comparability
        idx1 = (C1 > 1e-4 * C1.max())
        idx2 = (C2 > 1e-4 * C2.max())

        perc = 0.99

        thresh = 1e-3

        idx = idx1 | idx2

        assert np.percentile(np.abs(C1[idx] - C2[idx]),
                             perc) < thresh * max(C1.max(), C2.max())

    for fmin in [None, librosa.note_to_hz('C2')]:
        for n_bins in [1, 12, 24, 48, 72, 74, 76]:
            for bins_per_octave in [12, 24]:
                for tuning in [None, 0, 0.25]:
                    for resolution in [1, 2]:
                        for norm in [1, 2]:
                            for res_type in [None, 'polyphase']:
                                yield (__test, 512, fmin, n_bins,
                                       bins_per_octave, tuning, resolution,
                                       norm, 0.01, res_type)
Example #39
0
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

y, sr = librosa.load(
    r"C:\Users\theko\Documents\Dataset\022035001\Tukiya_Tiruvadi.mp3",
    sr=22050)

cqt = librosa.cqt(y, sr, fmin=librosa.note_to_hz("C2"), n_bins=48)

c = np.abs(cqt)

# fig, ax = plt.subplots()
#
# img = librosa.display.specshow(librosa.amplitude_to_db(c, ref=np.max), sr=sr, x_axis='time', y_axis='cqt_note', ax=ax)
#
# ax.set_title('Constant-Q power spectrum')
#
# fig.colorbar(img, ax=ax, format="%+2.0f dB")
i = np.argmax(c, axis=0)

notes = np.zeros((48, ))
for i_ in i:
    notes[i_] += 1

notes_X = np.linspace(48, 96, 48, dtype='int32')
plt.plot(notes_X, notes)
plt.show()
Example #40
0
    def __init__(self, *args, **kwargs):
        super(self.__class__, self).__init__(*args, **kwargs)

        args, _ = parser.parse_known_args()

        if args.samplerate == None:
            self.samplerate = \
                int(sd.query_devices(args.input_device)['default_samplerate'])
        else:
            self.samplerate = int(args.samplerate)
        print(f"INFO -- Sampling rate at {self.samplerate} Hz")

        self.threadpool = QtCore.QThreadPool()

        self.q = queue.Queue()

        self.setFixedSize(args.width, args.height)
        self.mainbox = QtWidgets.QWidget()
        self.setCentralWidget(self.mainbox)
        self.layout = QtWidgets.QGridLayout()
        self.mainbox.setLayout(self.layout)

        # Widgets
        self.spec_plot = SpectrogramWidget()
        self.wave_plot = WaveFormWidget()

        for i, widget in enumerate([self.spec_plot, self.wave_plot]):
            self.layout.addWidget(widget, i, 0)

        # Initialize x and y
        self.length = self.samplerate * args.duration
        self.y = np.random.rand(self.length, len(args.channels))
        self.x = np.linspace(0, args.duration, num=self.length)

        self.zcr = librosa.feature.zero_crossing_rate(self.y.mean(axis=1))[0]

        # Wave Plot
        self.waveline_1 = self.wave_plot.plot(x=self.x,
                                              y=self.y[:, 0],
                                              pen=pg.mkPen('g', width=0.5),
                                              name='channel_1')
        self.waveline_2 = self.wave_plot.plot(x=self.x,
                                              y=self.y[:, 1],
                                              pen=pg.mkPen('y', width=0.5),
                                              name='channel_2')
        self.waveline_3 = self.wave_plot.plot(x=np.linspace(
            0, args.duration, self.zcr.shape[0]),
                                              y=self.zcr,
                                              pen=pg.mkPen('r', width=2),
                                              name='zcr')

        # Spectrogram
        self.fmax = int(
            librosa.core.fft_frequencies(sr=self.samplerate,
                                         n_fft=args.n_fft)[-1])
        D = librosa.stft(y=self.y.mean(axis=1), n_fft=args.n_fft, center=False)
        self.specdata = librosa.amplitude_to_db(np.abs(D), ref=np.max)

        # M = librosa.feature.melspectrogram(
        #             y=self.y.mean(axis=1),
        #             sr=self.samplerate,
        #             n_fft=args.n_fft,
        #             n_mels=args.n_mels)
        # self.specdata = librosa.power_to_db(S=M, ref=np.max)

        self.F0 = librosa.yin(y=self.y.mean(axis=1),
                              sr=self.samplerate,
                              frame_length=2048,
                              fmin=librosa.note_to_hz('C2'),
                              fmax=librosa.note_to_hz('C5'),
                              center=False)
        self.spec_image = pg.ImageItem(item=self.specdata.T)
        self.spec_plot.addItem(item=self.spec_image)
        self.f0_line = self.spec_plot.plot(x=np.linspace(
            0, args.duration, self.F0.shape[0]),
                                           y=self.F0,
                                           pen=pg.mkPen('r', width=2),
                                           name='f0')
        self.bar = pg.ColorBarItem(values=(librosa.note_to_hz('C2'),
                                           librosa.note_to_hz('C5')),
                                   cmap=pg.colormap.get('CET-L9'))
        self.bar.setImageItem(self.spec_image)

        # Start audio stream and animations
        self.start_stream()
        if args.input_device == 'Virtual Input (VB-Audio Virtual Cable), Windows DirectSound':
            self.play_media(media_url=args.media_url,
                            type='stream',
                            volume=100)
        self.animate()
        self.show()
Example #41
0
def main():
    """
    Compare the real variant with different max pooling settings to librosa HVQT.
    """

    # Select parameters to use across all implementations
    n_bins = 216  # 6 octaves
    gamma = None  # default gamma
    hop_length = 512
    bins_per_octave = 36
    fmin = librosa.note_to_hz('C1')
    harmonics = [0.5, 1, 2, 3, 4, 5]

    # Load an example piece of audio
    y, sr = librosa.load(librosa.util.example_audio_file())

    # Calculate the HVQT using librosa
    lib_start = time()
    lib_hvqt = librosa_hvqt(y, harmonics, sr, hop_length, fmin, n_bins,
                            bins_per_octave, gamma)
    print(f'Processing Time (Librosa): {time() - lib_start}')

    # Print a new line
    print()

    # Convert librosa HVQT to decibels
    lib_hvqt = librosa.amplitude_to_db(lib_hvqt, ref=np.max)

    # Set the device for the convolutional implementations
    device = 1
    device = torch.device(
        f'cuda:{device}' if torch.cuda.is_available() else 'cpu')

    # Add a batch and channel dimension to the audio, and make it a tensor
    y = torch.Tensor([[y]]).to(device)

    # Construct the real-only variant
    lhvqt_real = LHVQT(lvqt=LVQT_R,
                       harmonics=harmonics,
                       fs=sr,
                       hop_length=hop_length,
                       fmin=fmin,
                       n_bins=n_bins,
                       bins_per_octave=bins_per_octave,
                       gamma=gamma,
                       max_p=1,
                       random=False,
                       update=False,
                       to_db=False,
                       db_to_prob=False,
                       batch_norm=False,
                       var_drop=False).to(device)

    # Compute the response from the real variant
    rea_start = time()
    rea_hvqt = lhvqt_real(y)
    print(f'Processing Time (Real w/ MP=1): {time() - rea_start}')

    # Remove from the device and convert back to ndarray
    rea_hvqt = rea_hvqt.squeeze(0).cpu().detach().numpy()

    # Convert HVQT to decibels
    rea_hvqt = librosa.amplitude_to_db(rea_hvqt, ref=np.max)

    # Compute similarities after putting all transforms on dB scale
    print('Real Variant Similariy (MP=1): %1.2f%%' %
          (100 * hvqt_similarity(rea_hvqt, lib_hvqt)))

    # Print a new line
    print()

    # Construct the real-only variant
    lhvqt_real = LHVQT(lvqt=LVQT_R,
                       harmonics=harmonics,
                       fs=sr,
                       hop_length=hop_length,
                       fmin=fmin,
                       n_bins=n_bins,
                       bins_per_octave=bins_per_octave,
                       gamma=gamma,
                       max_p=2,
                       to_db=False,
                       db_to_prob=False,
                       batch_norm=False,
                       var_drop=False).to(device)

    # Compute the response from the real variant
    rea_start = time()
    rea_hvqt = lhvqt_real(y)
    print(f'Processing Time (Real w/ MP=2): {time() - rea_start}')

    # Remove from the device and convert back to ndarray
    rea_hvqt = rea_hvqt.squeeze(0).cpu().detach().numpy()

    # Convert HVQT to decibels
    rea_hvqt = librosa.amplitude_to_db(rea_hvqt, ref=np.max)

    # Compute similarities after putting all transforms on dB scale
    print('Real Variant Similariy (MP=2): %1.2f%%' %
          (100 * hvqt_similarity(rea_hvqt, lib_hvqt)))

    # Print a new line
    print()

    # Construct the real-only variant
    lhvqt_real = LHVQT(lvqt=LVQT_R,
                       harmonics=harmonics,
                       fs=sr,
                       hop_length=hop_length,
                       fmin=fmin,
                       n_bins=n_bins,
                       bins_per_octave=bins_per_octave,
                       gamma=gamma,
                       max_p=4,
                       to_db=False,
                       db_to_prob=False,
                       batch_norm=False,
                       var_drop=False).to(device)

    # Compute the response from the real variant
    rea_start = time()
    rea_hvqt = lhvqt_real(y)
    print(f'Processing Time (Real w/ MP=4): {time() - rea_start}')

    # Remove from the device and convert back to ndarray
    rea_hvqt = rea_hvqt.squeeze(0).cpu().detach().numpy()

    # Convert HVQT to decibels
    rea_hvqt = librosa.amplitude_to_db(rea_hvqt, ref=np.max)

    # Compute similarities after putting all transforms on dB scale
    print('Real Variant Similariy (MP=4): %1.2f%%' %
          (100 * hvqt_similarity(rea_hvqt, lib_hvqt)))

    # Print a new line
    print()

    # Construct the real-only variant
    lhvqt_real = LHVQT(lvqt=LVQT_R,
                       harmonics=harmonics,
                       fs=sr,
                       hop_length=hop_length,
                       fmin=fmin,
                       n_bins=n_bins,
                       bins_per_octave=bins_per_octave,
                       gamma=gamma,
                       max_p=8,
                       to_db=False,
                       db_to_prob=False,
                       batch_norm=False,
                       var_drop=False).to(device)

    # Compute the response from the real variant
    rea_start = time()
    rea_hvqt = lhvqt_real(y)
    print(f'Processing Time (Real w/ MP=8): {time() - rea_start}')

    # Remove from the device and convert back to ndarray
    rea_hvqt = rea_hvqt.squeeze(0).cpu().detach().numpy()

    # Convert HVQT to decibels
    rea_hvqt = librosa.amplitude_to_db(rea_hvqt, ref=np.max)

    # Compute similarities after putting all transforms on dB scale
    print('Real Variant Similariy (MP=8): %1.2f%%' %
          (100 * hvqt_similarity(rea_hvqt, lib_hvqt)))

    # Print a new line
    print()

    # Construct the real-only variant
    lhvqt_real = LHVQT(lvqt=LVQT_R,
                       harmonics=harmonics,
                       fs=sr,
                       hop_length=hop_length,
                       fmin=fmin,
                       n_bins=n_bins,
                       bins_per_octave=bins_per_octave,
                       gamma=gamma,
                       max_p=16,
                       to_db=False,
                       db_to_prob=False,
                       batch_norm=False,
                       var_drop=False).to(device)

    # Compute the response from the real variant
    rea_start = time()
    rea_hvqt = lhvqt_real(y)
    print(f'Processing Time (Real w/ MP=16): {time() - rea_start}')

    # Remove from the device and convert back to ndarray
    rea_hvqt = rea_hvqt.squeeze(0).cpu().detach().numpy()

    # Convert HVQT to decibels
    rea_hvqt = librosa.amplitude_to_db(rea_hvqt, ref=np.max)

    # Compute similarities after putting all transforms on dB scale
    print('Real Variant Similariy (MP=16): %1.2f%%' %
          (100 * hvqt_similarity(rea_hvqt, lib_hvqt)))

    # Print a new line
    print()

    # Construct the real-only variant
    lhvqt_real = LHVQT(lvqt=LVQT_R,
                       harmonics=harmonics,
                       fs=sr,
                       hop_length=hop_length,
                       fmin=fmin,
                       n_bins=n_bins,
                       bins_per_octave=bins_per_octave,
                       gamma=gamma,
                       max_p=32,
                       to_db=False,
                       db_to_prob=False,
                       batch_norm=False,
                       var_drop=False).to(device)

    # Compute the response from the real variant
    rea_start = time()
    rea_hvqt = lhvqt_real(y)
    print(f'Processing Time (Real w/ MP=32): {time() - rea_start}')

    # Remove from the device and convert back to ndarray
    rea_hvqt = rea_hvqt.squeeze(0).cpu().detach().numpy()

    # Convert HVQT to decibels
    rea_hvqt = librosa.amplitude_to_db(rea_hvqt, ref=np.max)

    # Compute similarities after putting all transforms on dB scale
    print('Real Variant Similariy (MP=32): %1.2f%%' %
          (100 * hvqt_similarity(rea_hvqt, lib_hvqt)))
def notes_to_audio(automaton=False,function=None,deterministic=True,maxsamplesize=44100):
	if function != None:
		print "###################################################"
		print "Function to Audio"
		print "###################################################"
		print "Function:",function
		#Example:
		#>>> map(lambda x: eval('x*x+x+1'),range(1,10))
		#[3, 7, 13, 21, 31, 43, 57, 73, 91]
		notes=map(lambda x: eval(function),range(0,44100))
		npnotes=np.asarray(notes)
		#scalednpnotes=np.int16(npnotes/np.max(npnotes)*32767)
		scalednpnotes=npnotes
		print "Notes :",scalednpnotes
		print "Size of scaled notes:",len(scalednpnotes)
		write("function_synthesized_music.wav",maxsamplesize,scalednpnotes)
		return
	if function == None and automaton == False:
		print "###################################################"
		print "Notes to Audio"
		print "###################################################"
		npnotes=np.random.uniform(10,100,44100)
		#scalednpnotes=np.int16(npnotes/np.max(npnotes)*32767)
		scalednpnotes=npnotes
		print "Notes :",scalednpnotes
		print "Size of scaled notes:",len(scalednpnotes)
		write("notes_synthesized_music.wav",maxsamplesize,scalednpnotes)
		return
	if automaton == True:
		print "###################################################"
		print "Automaton to Audio"
		print "###################################################"
		states2notes_machine_file=open("NotesStateMachine.txt","r")
		states2notes_machine=ast.literal_eval(states2notes_machine_file.read())
		dfanotes=[int(librosa.note_to_hz(states2notes_machine['start-s1'])*1000)]
		prevstates=['start']
		iter=0
		while iter < maxsamplesize-1:
			possibletransitions=[]
			prevprevstates=prevstates
			prevstates=[]
			#print "prevstate:",prevstate
			#if 'fs' in prevstate:
			#	break
			for k,v in states2notes_machine.iteritems():
				statetransition=k.split("-")
				if statetransition[0] in prevprevstates:
					possibletransitions.append(states2notes_machine[k])
					prevstates.append(statetransition[1])
					if deterministic:
						break
			for note in possibletransitions:
				hertz=librosa.note_to_hz(note)
				#print "Hertz:",hertz
				dfanotes.append(int(hertz*1000))
				#break
			iter += 1
		npnotes=np.array(dfanotes)
		#scalednpnotes=np.int16(npnotes/np.max(npnotes)*32767)
		scalednpnotes=npnotes
		print "Notes :",scalednpnotes
		print "Size of scaled dfanotes:",len(scalednpnotes)
		write("automaton_synthesized_music.wav",maxsamplesize,scalednpnotes)
		return
bound_frames = librosa.util.fix_frames(bound_frames,
                                       x_min=None,
                                       x_max=C.shape[1]-1)

###################################################
# And plot the final segmentation over original CQT


# sphinx_gallery_thumbnail_number = 5

import matplotlib.patches as patches
plt.figure(figsize=(12, 4))

bound_times = librosa.frames_to_time(bound_frames)
freqs = librosa.cqt_frequencies(n_bins=C.shape[0],
                                fmin=librosa.note_to_hz('C1'),
                                bins_per_octave=BINS_PER_OCTAVE)

librosa.display.specshow(C, y_axis='cqt_hz', sr=sr,
                         bins_per_octave=BINS_PER_OCTAVE,
                         x_axis='time')
ax = plt.gca()

for interval, label in zip(zip(bound_times, bound_times[1:]), bound_segs):
    ax.add_patch(patches.Rectangle((interval[0], freqs[0]),
                                   interval[1] - interval[0],
                                   freqs[-1],
                                   facecolor=colors(label),
                                   alpha=0.50))

plt.tight_layout()
    # file name to mat file with onsets and midi notes-------------------------
    mat_file_name = '01-AchGottundHerr-GTF0s.mat'

    # Loading file in memory---------------------------------------------------
    file_name = '01-AchGottundHerr_4Kanal.wav'
    file_path = 'ignore/sounds/'
    full_name = file_path + file_name
    audio_data, sampling_rate = libr.load(full_name, sr=None, duration=5)

    # CQT Params---------------------------------------------------------------
    hop = 256
    start_note = 'C2'
    cqt = libr.cqt(audio_data,
                   sr=sampling_rate,
                   hop_length=hop,
                   fmin=libr.note_to_hz(start_note),
                   n_bins=48,
                   bins_per_octave=12)

    # Define common harmonic structure-----------------------------------------
    cqt_bins = 48
    list_chs = [0, 12, 19, 24, 28, 31]

    # First initialisation of fundamental frequency distribution---------------
    chs = initial_harmonics(list_chs, np.zeros((cqt_bins, 1)), option=1)
    u, v = inverse_filter(cqt, chs, cqt_bins)
    u_bar = non_linear_mapping(u)
    len_u = len(u_bar[:, 0])

    # iterative algorithm------------------------------------------------------
    (num_rows, num_cols) = cqt.shape
Example #45
0
import json
import numpy as np
import sys
import tensorflow as tf
import random
import os
import matplotlib.pyplot as plt
import librosa
from scipy import signal
import pickle

from wavenet import (WaveNetModel, time_to_batch, batch_to_time, causal_conv,
                     optimizer_factory, mu_law_decode, image2vector)

NOTES = ['D#3', 'G3', 'A#3']  # e-flat chord
NOTES_HZ = librosa.note_to_hz(NOTES)

SAMPLE_RATE_HZ = 2000.0  # Hz
TRAIN_ITERATIONS = 400
SAMPLE_DURATION = 0.5  # Seconds
SAMPLE_PERIOD_SECS = 1.0 / SAMPLE_RATE_HZ
MOMENTUM = 0.95
GENERATE_SAMPLES = 900
QUANTIZATION_CHANNELS = 256
NUM_SPEAKERS = 3
F1 = 155.56  # E-flat frequency in hz
F2 = 196.00  # G frequency in hz
F3 = 233.08  # B-flat frequency in hz
receptive_field = 256

from __future__ import print_function

import librosa
from magenta.common import tf_utils
from magenta.models.onsets_frames_transcription import audio_transform
import tensorflow as tf

DEFAULT_SAMPLE_RATE = 16000

DEFAULT_SPEC_TYPE = 'cqt'
DEFAULT_SPEC_LOG_AMPLITUDE = False
DEFAULT_SPEC_MEL_HTK = False

DEFAULT_SPEC_HOP_LENGTH = 512
DEFAULT_SPEC_N_BINS = 264  # (88/12)*36=264
DEFAULT_SPEC_FMIN = librosa.note_to_hz(['A0'])[0]

DEFAULT_CQT_BINS_PER_OCTAVE = 36

DEFAULT_FRAMES_PER_SECOND = DEFAULT_SAMPLE_RATE / DEFAULT_SPEC_HOP_LENGTH

MIN_MIDI_PITCH = librosa.note_to_midi('A0')
MAX_MIDI_PITCH = librosa.note_to_midi('C8')
MIDI_PITCHES = MAX_MIDI_PITCH - MIN_MIDI_PITCH + 1

MAX_MIDI_VELOCITY = 127

DEFAULT_CROP_TRAINING_SEQUENCE_TO_NOTES = False
DEFAULT_ONSET_MODE = 'length_ms'
DEFAULT_ONSET_LENGTH = 100
DEFAULT_ONSET_DELAY = 0
Example #47
0
from __future__ import print_function

import librosa
from magenta.common import tf_utils
from magenta.models.onsets_frames_transcription import audio_transform
import tensorflow as tf

DEFAULT_SAMPLE_RATE = 16000

DEFAULT_SPEC_TYPE = 'cqt'
DEFAULT_SPEC_LOG_AMPLITUDE = False
DEFAULT_SPEC_MEL_HTK = False

DEFAULT_SPEC_HOP_LENGTH = 512
DEFAULT_SPEC_N_BINS = 264  # (88/12)*36=264
DEFAULT_SPEC_FMIN = librosa.note_to_hz(['A0'])[0]

DEFAULT_CQT_BINS_PER_OCTAVE = 36

DEFAULT_FRAMES_PER_SECOND = DEFAULT_SAMPLE_RATE / DEFAULT_SPEC_HOP_LENGTH

MIN_MIDI_PITCH = librosa.note_to_midi('A0')
MAX_MIDI_PITCH = librosa.note_to_midi('C8')
MIDI_PITCHES = MAX_MIDI_PITCH - MIN_MIDI_PITCH + 1

MAX_MIDI_VELOCITY = 127

DEFAULT_ONSET_MODE = 'length_ms'
DEFAULT_ONSET_LENGTH = 100
DEFAULT_ONSET_DELAY = 0
DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL = 0.0