Ejemplo n.º 1
0
def freq_correct(src,
                 dst,
                 fs=44100,
                 fc=3000,
                 mode='normal',
                 win_length=1024,
                 alpha=1):

    out = np.zeros_like(src)
    try:
        if mode == 'normal':
            src_oct = librosa.hz_to_octs(basefreq(src, fs, fc))
            dst_oct = librosa.hz_to_octs(basefreq(dst, fs, fc))
            offset = (dst_oct - src_oct) * 12 * alpha
            out = librosa.effects.pitch_shift(src, 44100, n_steps=offset)
        elif mode == 'track':
            length = min([len(src), len(dst)])
            for i in range(length // win_length):
                src_oct = librosa.hz_to_octs(
                    basefreq(src[i * win_length:(i + 1) * win_length], fs, fc))
                dst_oct = librosa.hz_to_octs(
                    basefreq(dst[i * win_length:(i + 1) * win_length], fs, fc))

                offset = (dst_oct - src_oct) * 12 * alpha
                out[i * win_length:(i + 1) *
                    win_length] = librosa.effects.pitch_shift(
                        src[i * win_length:(i + 1) * win_length],
                        44100,
                        n_steps=offset)
        return out
    except Exception as e:
        return src
Ejemplo n.º 2
0
    def __init__(self,
                 frame_size,
                 fmax,
                 fps,
                 oct_width,
                 center_note,
                 log_eta,
                 sample_rate=44100,
                 fold=None):
        self.fps = fps
        self.fmax = fmax
        self.sample_rate = sample_rate
        self.oct_width = oct_width
        self.center_note = center_note
        self.frame_size = frame_size
        self.log_eta = log_eta

        # parameters are based on Cho and Bello, 2014.
        import librosa
        ctroct = (librosa.hz_to_octs(librosa.note_to_hz(center_note))
                  if center_note is not None else None)

        self.filterbank = librosa.filters.chroma(sr=sample_rate,
                                                 n_fft=frame_size,
                                                 octwidth=oct_width,
                                                 ctroct=ctroct).T[:-1]

        # mask out everything above fmax
        from bottleneck import move_mean
        m = np.fft.fftfreq(frame_size,
                           1. / sample_rate)[:frame_size / 2] < fmax
        mask_smooth = move_mean(m, window=10, min_count=1)
        self.filterbank *= mask_smooth[:, np.newaxis]
Ejemplo n.º 3
0
def test_hz_to_octs(tuning, bins_per_octave):
    freq = np.asarray([55, 110, 220, 440]) * (2.0**(tuning / bins_per_octave))
    octs = [1, 2, 3, 4]
    oct_out = librosa.hz_to_octs(freq,
                                 tuning=tuning,
                                 bins_per_octave=bins_per_octave)

    assert np.allclose(octs, oct_out)
Ejemplo n.º 4
0
def test_hz_to_octs_dep(a440):
    with warnings.catch_warnings(record=True) as out:
        freq = np.asarray([55, 110, 220, 440]) * (float(a440) / 440.0)
        octs = [1, 2, 3, 4]
        oct_out = librosa.hz_to_octs(freq, A440=a440)

        assert np.allclose(octs, oct_out)

        # And that it says the right thing (roughly)
        assert 'deprecated' in str(out[0].message).lower()
Ejemplo n.º 5
0
def main():
    util.clean_tempfiles()

    #_,Aaudio = sound.load('./music/大.wav')
    Aaudio = dsp.sin(1000, 44100, 0.1)*10000
    # print(sound.basefreq(Aaudio, 44100, fc=3000))
    # xk,fft = dsp.showfreq(Aaudio,fs=44100,fc=5000)
    # plt.plot(xk,fft)
    # plt.show()

    # Aaudio = Aaudio[1500:]
    # print(Aaudio)
    # sound.playtest(Aaudio)
    Baudio,Bsyllables,Bfeatures,Bpeakindexs,Bbias = process_audio('./music/小星星.wav', './tmp/audio1', 0.07, 'time',crop_time=0.1,hpss='')
    #sound.playtest(dsp.fft_filter(Baudio,fs=44100,fc=[800,8000]))
    for syllable in Bsyllables:
        base = dsp.basefreq(syllable, fs=44100, fc=3000)
        print(base,librosa.hz_to_octs(base))
        xk,fft = dsp.showfreq(syllable,fs=44100,fc=3000)
        plt.plot(xk,fft)
        plt.show()
    #sound.playtest(Baudio)

    make_B_by_A(Aaudio,[Baudio,Bsyllables,Bfeatures,Bpeakindexs,Bbias],fc=[400,8000])
Ejemplo n.º 6
0
    def __test_to_octs(infile):
        DATA = load(infile)
        z = librosa.hz_to_octs(DATA['f'])

        assert np.allclose(z, DATA['result'])
Ejemplo n.º 7
0
def chroma(sr, n_fft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=2):
    """Create a Filterbank matrix to convert STFT to chroma

    :usage:
        >>> # Build a simple chroma filter bank
        >>> chroma_fb   = librosa.filters.chroma(22050, 4096)

        >>> # Use quarter-tones instead of semitones
        >>> chroma_fbq  = librosa.filters.chroma(22050, 4096, n_chroma=24)

        >>> # Equally weight all octaves
        >>> chroma_fb   = librosa.filters.chroma(22050, 4096, octwidth=None)

    :parameters:
      - sr        : int > 0 [scalar]
          audio sampling rate

      - n_fft     : int > 0 [scalar]
          number of FFT bins

      - n_chroma  : int > 0 [scalar]
          number of chroma bins

      - A440      : float > 0 [scalar]
          Reference frequency for A440

      - ctroct    : float > 0 [scalar]

      - octwidth  : float > 0 or None [scalar]
          ``ctroct`` and ``octwidth`` specify a dominance window -
          a Gaussian weighting centered on ``ctroct`` (in octs, A0 = 27.5Hz)
          and with a gaussian half-width of ``octwidth``.
          Set ``octwidth`` to ``None`` to use a flat weighting.

    :returns:
      - wts       : ndarray [shape=(n_chroma, 1 + n_fft / 2)]
          Chroma filter matrix
    """

    wts = np.zeros((n_chroma, n_fft))

    # Get the FFT bins, not counting the DC component
    frequencies = np.linspace(0, sr, n_fft, endpoint=False)[1:]

    frqbins = n_chroma * librosa.hz_to_octs(frequencies, A440)

    # make up a value for the 0 Hz bin = 1.5 octaves below bin 1
    # (so chroma is 50% rotated from bin 1, and bin width is broad)
    frqbins = np.concatenate(([frqbins[0] - 1.5 * n_chroma], frqbins))

    binwidthbins = np.concatenate((np.maximum(frqbins[1:] - frqbins[:-1],
                                              1.0), [1]))

    D = np.subtract.outer(frqbins, np.arange(0, n_chroma, dtype='d')).T

    n_chroma2 = np.round(float(n_chroma) / 2)

    # Project into range -n_chroma/2 .. n_chroma/2
    # add on fixed offset of 10*n_chroma to ensure all values passed to
    # rem are +ve
    D = np.remainder(D + n_chroma2 + 10 * n_chroma, n_chroma) - n_chroma2

    # Gaussian bumps - 2*D to make them narrower
    wts = np.exp(-0.5 * (2 * D / np.tile(binwidthbins, (n_chroma, 1)))**2)

    # normalize each column
    wts = librosa.util.normalize(wts, norm=2, axis=0)

    # Maybe apply scaling for fft bins
    if octwidth is not None:
        wts *= np.tile(
            np.exp(-0.5 * (((frqbins / n_chroma - ctroct) / octwidth)**2)),
            (n_chroma, 1))

    # remove aliasing columns, copy to ensure row-contiguity
    return np.ascontiguousarray(wts[:, :int(1 + n_fft / 2)])
Ejemplo n.º 8
0
def chroma(sr, n_fft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=2):
    """Create a Filterbank matrix to convert STFT to chroma

    :usage:
        >>> # Build a simple chroma filter bank
        >>> chroma_fb   = librosa.filters.chroma(22050, 4096)

        >>> # Use quarter-tones instead of semitones
        >>> chroma_fbq  = librosa.filters.chroma(22050, 4096, n_chroma=24)

        >>> # Equally weight all octaves
        >>> chroma_fb   = librosa.filters.chroma(22050, 4096, octwidth=None)

    :parameters:
      - sr        : int > 0
          audio sampling rate

      - n_fft     : int > 0
          number of FFT bins

      - n_chroma  : int > 0
          number of chroma bins

      - A440      : float
          Reference frequency for A440

      - ctroct    : float > 0

      - octwidth  : float or None
          ``ctroct`` and ``octwidth`` specify a dominance window -
          a Gaussian weighting centered on ``ctroct`` (in octs, A0 = 27.5Hz)
          and with a gaussian half-width of ``octwidth``.
          Set ``octwidth`` to ``None`` to use a flat weighting.

    :returns:
      - wts       : ndarray, shape=(n_chroma, 1 + n_fft / 2)
          Chroma filter matrix
    """

    wts = np.zeros((n_chroma, n_fft))

    # Get the FFT bins, not counting the DC component
    frequencies = np.linspace(0, sr, n_fft, endpoint=False)[1:]

    frqbins = n_chroma * librosa.hz_to_octs(frequencies, A440)

    # make up a value for the 0 Hz bin = 1.5 octaves below bin 1
    # (so chroma is 50% rotated from bin 1, and bin width is broad)
    frqbins = np.concatenate(([frqbins[0] - 1.5 * n_chroma], frqbins))

    binwidthbins = np.concatenate((np.maximum(frqbins[1:] - frqbins[:-1],
                                              1.0), [1]))

    D = np.subtract.outer(frqbins, np.arange(0, n_chroma, dtype='d')).T

    n_chroma2 = np.round(n_chroma / 2.0)

    # Project into range -n_chroma/2 .. n_chroma/2
    # add on fixed offset of 10*n_chroma to ensure all values passed to
    # rem are +ve
    D = np.remainder(D + n_chroma2 + 10*n_chroma, n_chroma) - n_chroma2

    # Gaussian bumps - 2*D to make them narrower
    wts = np.exp(-0.5 * (2*D / np.tile(binwidthbins, (n_chroma, 1)))**2)

    # normalize each column
    wts = librosa.util.normalize(wts, norm=2, axis=0)

    # Maybe apply scaling for fft bins
    if octwidth is not None:
        wts *= np.tile(
            np.exp(-0.5 * (((frqbins/n_chroma - ctroct)/octwidth)**2)),
            (n_chroma, 1))

    # remove aliasing columns, copy to ensure row-contiguity
    return np.ascontiguousarray(wts[:, :(1 + n_fft/2)])
Ejemplo n.º 9
0
    def __test(a440):
        freq = np.asarray([55, 110, 220, 440]) * (float(a440) / 440.0)
        octs = [1, 2, 3, 4]
        oct_out = librosa.hz_to_octs(freq, A440=a440)

        assert np.allclose(octs, oct_out)
Ejemplo n.º 10
0
def test_hz_to_octs(infile):
    DATA = load(infile)
    z = librosa.hz_to_octs(DATA["f"])

    assert np.allclose(z, DATA["result"])
    def __test(a440):
        freq = np.asarray([55, 110, 220, 440]) * (float(a440) / 440.0)
        octs = [1, 2, 3, 4]
        oct_out = librosa.hz_to_octs(freq, A440=a440)

        assert np.allclose(octs, oct_out)
Ejemplo n.º 12
0
    fps, endtime, height, width = ffmpeg.get_video_infos(
        os.path.join(dataset, video_names[i]))
    util.makedirs(os.path.join('./tmp/video2image', '%03d' % i))
    ffmpeg.video2image(
        os.path.join(dataset, video_names[i]),
        os.path.join('./tmp/video2image', '%03d' % i, '%05d.jpg'))

print('Generating voice...')
sinmusic, musicinfos = notation.notations2music(notations, mode='sin')

music = np.zeros_like(sinmusic)
for i in range(len(musicinfos['time'])):
    for j in range(len(musicinfos['freq'][i])):
        if musicinfos['freq'][i][j] != 0:
            diff = np.abs(
                librosa.hz_to_octs(seed_freqs) -
                librosa.hz_to_octs(musicinfos['freq'][i][j]))
            index = np.argwhere(diff == np.min(diff))[0][0]
            _tone = seed_voices[index]
            _tone = sound.freq_correct(_tone,
                                       srcfreq=seed_freqs[index],
                                       dstfreq=musicinfos['freq'][i][j],
                                       alpha=1.0)
            _tone = sound.highlight_bass(_tone, musicinfos['freq'][i][j],
                                         seed_freqs[index])
            music[int(musicinfos['time'][i] *
                      44100):int(musicinfos['time'][i] * 44100) +
                  len(_tone)] += _tone
# music = music+sinmusic*0.1
music = dsp.bpf(music, 44100, 20, 5000)
music = (arrop.sigmoid(music / 32768) - 0.5) * 65536
Ejemplo n.º 13
0
def chroma(sr, n_fft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=None):
    """Create a Filterbank matrix to convert STFT to chroma

    :usage:
        >>> # Build a simple chroma filter bank
        >>> chroma_fb   = librosa.filters.chroma(22050, 4096)

        >>> # Use quarter-tones instead of semitones
        >>> chroma_fbq  = librosa.filters.chroma(22050, 4096, n_chroma=24)

        >>> # Down-weight the high and low frequencies
        >>> chroma_fb   = librosa.filters.chroma(22050, 4096, ctroct=5, octwidth=2)

    :parameters:
      - sr        : int
          audio sampling rate
      - n_fft     : int
          FFT window size
      - n_chroma  : int
          number of chroma bins
      - A440      : float
          Reference frequency for A440
      - ctroct    : float
      - octwidth  : float
          These parameters specify a dominance window - Gaussian
          weighting centered on ctroct (in octs, re A0 = 27.5Hz) and
          with a gaussian half-width of octwidth.  
          Defaults to halfwidth = inf, i.e. flat.

    :returns:
      - wts       : ndarray, shape=(n_chroma, 1 + n_fft / 2) 
          Chroma filter matrix

    """

    wts         = np.zeros((n_chroma, n_fft))

    # Get the FFT bins, not counting the DC component
    frequencies = np.linspace(0, sr, n_fft, endpoint=False)[1:]

    fftfrqbins  = n_chroma * librosa.hz_to_octs(frequencies, A440)

    # make up a value for the 0 Hz bin = 1.5 octaves below bin 1
    # (so chroma is 50% rotated from bin 1, and bin width is broad)
    fftfrqbins = np.concatenate( (   [fftfrqbins[0] - 1.5 * n_chroma],
                                        fftfrqbins))

    binwidthbins = np.concatenate(
        (np.maximum(fftfrqbins[1:] - fftfrqbins[:-1], 1.0), [1]))

    D = np.tile(fftfrqbins, (n_chroma, 1))  \
        - np.tile(np.arange(0, n_chroma, dtype='d')[:, np.newaxis], 
        (1, n_fft))

    n_chroma2 = round(n_chroma / 2.0)

    # Project into range -n_chroma/2 .. n_chroma/2
    # add on fixed offset of 10*n_chroma to ensure all values passed to
    # rem are +ve
    D = np.remainder(D + n_chroma2 + 10*n_chroma, n_chroma) - n_chroma2

    # Gaussian bumps - 2*D to make them narrower
    wts = np.exp(-0.5 * (2*D / np.tile(binwidthbins, (n_chroma, 1)))**2)

    # normalize each column
    wts /= np.tile(np.sqrt(np.sum(wts**2, 0)), (n_chroma, 1))

    # Maybe apply scaling for fft bins
    if octwidth is not None:
        wts *= np.tile(
            np.exp(-0.5 * (((fftfrqbins/n_chroma - ctroct)/octwidth)**2)),
            (n_chroma, 1))

    # remove aliasing columns
    return wts[:, :(1 + n_fft/2)]