def freq_correct(src, dst, fs=44100, fc=3000, mode='normal', win_length=1024, alpha=1): out = np.zeros_like(src) try: if mode == 'normal': src_oct = librosa.hz_to_octs(basefreq(src, fs, fc)) dst_oct = librosa.hz_to_octs(basefreq(dst, fs, fc)) offset = (dst_oct - src_oct) * 12 * alpha out = librosa.effects.pitch_shift(src, 44100, n_steps=offset) elif mode == 'track': length = min([len(src), len(dst)]) for i in range(length // win_length): src_oct = librosa.hz_to_octs( basefreq(src[i * win_length:(i + 1) * win_length], fs, fc)) dst_oct = librosa.hz_to_octs( basefreq(dst[i * win_length:(i + 1) * win_length], fs, fc)) offset = (dst_oct - src_oct) * 12 * alpha out[i * win_length:(i + 1) * win_length] = librosa.effects.pitch_shift( src[i * win_length:(i + 1) * win_length], 44100, n_steps=offset) return out except Exception as e: return src
def __init__(self, frame_size, fmax, fps, oct_width, center_note, log_eta, sample_rate=44100, fold=None): self.fps = fps self.fmax = fmax self.sample_rate = sample_rate self.oct_width = oct_width self.center_note = center_note self.frame_size = frame_size self.log_eta = log_eta # parameters are based on Cho and Bello, 2014. import librosa ctroct = (librosa.hz_to_octs(librosa.note_to_hz(center_note)) if center_note is not None else None) self.filterbank = librosa.filters.chroma(sr=sample_rate, n_fft=frame_size, octwidth=oct_width, ctroct=ctroct).T[:-1] # mask out everything above fmax from bottleneck import move_mean m = np.fft.fftfreq(frame_size, 1. / sample_rate)[:frame_size / 2] < fmax mask_smooth = move_mean(m, window=10, min_count=1) self.filterbank *= mask_smooth[:, np.newaxis]
def test_hz_to_octs(tuning, bins_per_octave): freq = np.asarray([55, 110, 220, 440]) * (2.0**(tuning / bins_per_octave)) octs = [1, 2, 3, 4] oct_out = librosa.hz_to_octs(freq, tuning=tuning, bins_per_octave=bins_per_octave) assert np.allclose(octs, oct_out)
def test_hz_to_octs_dep(a440): with warnings.catch_warnings(record=True) as out: freq = np.asarray([55, 110, 220, 440]) * (float(a440) / 440.0) octs = [1, 2, 3, 4] oct_out = librosa.hz_to_octs(freq, A440=a440) assert np.allclose(octs, oct_out) # And that it says the right thing (roughly) assert 'deprecated' in str(out[0].message).lower()
def main(): util.clean_tempfiles() #_,Aaudio = sound.load('./music/大.wav') Aaudio = dsp.sin(1000, 44100, 0.1)*10000 # print(sound.basefreq(Aaudio, 44100, fc=3000)) # xk,fft = dsp.showfreq(Aaudio,fs=44100,fc=5000) # plt.plot(xk,fft) # plt.show() # Aaudio = Aaudio[1500:] # print(Aaudio) # sound.playtest(Aaudio) Baudio,Bsyllables,Bfeatures,Bpeakindexs,Bbias = process_audio('./music/小星星.wav', './tmp/audio1', 0.07, 'time',crop_time=0.1,hpss='') #sound.playtest(dsp.fft_filter(Baudio,fs=44100,fc=[800,8000])) for syllable in Bsyllables: base = dsp.basefreq(syllable, fs=44100, fc=3000) print(base,librosa.hz_to_octs(base)) xk,fft = dsp.showfreq(syllable,fs=44100,fc=3000) plt.plot(xk,fft) plt.show() #sound.playtest(Baudio) make_B_by_A(Aaudio,[Baudio,Bsyllables,Bfeatures,Bpeakindexs,Bbias],fc=[400,8000])
def __test_to_octs(infile): DATA = load(infile) z = librosa.hz_to_octs(DATA['f']) assert np.allclose(z, DATA['result'])
def chroma(sr, n_fft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=2): """Create a Filterbank matrix to convert STFT to chroma :usage: >>> # Build a simple chroma filter bank >>> chroma_fb = librosa.filters.chroma(22050, 4096) >>> # Use quarter-tones instead of semitones >>> chroma_fbq = librosa.filters.chroma(22050, 4096, n_chroma=24) >>> # Equally weight all octaves >>> chroma_fb = librosa.filters.chroma(22050, 4096, octwidth=None) :parameters: - sr : int > 0 [scalar] audio sampling rate - n_fft : int > 0 [scalar] number of FFT bins - n_chroma : int > 0 [scalar] number of chroma bins - A440 : float > 0 [scalar] Reference frequency for A440 - ctroct : float > 0 [scalar] - octwidth : float > 0 or None [scalar] ``ctroct`` and ``octwidth`` specify a dominance window - a Gaussian weighting centered on ``ctroct`` (in octs, A0 = 27.5Hz) and with a gaussian half-width of ``octwidth``. Set ``octwidth`` to ``None`` to use a flat weighting. :returns: - wts : ndarray [shape=(n_chroma, 1 + n_fft / 2)] Chroma filter matrix """ wts = np.zeros((n_chroma, n_fft)) # Get the FFT bins, not counting the DC component frequencies = np.linspace(0, sr, n_fft, endpoint=False)[1:] frqbins = n_chroma * librosa.hz_to_octs(frequencies, A440) # make up a value for the 0 Hz bin = 1.5 octaves below bin 1 # (so chroma is 50% rotated from bin 1, and bin width is broad) frqbins = np.concatenate(([frqbins[0] - 1.5 * n_chroma], frqbins)) binwidthbins = np.concatenate((np.maximum(frqbins[1:] - frqbins[:-1], 1.0), [1])) D = np.subtract.outer(frqbins, np.arange(0, n_chroma, dtype='d')).T n_chroma2 = np.round(float(n_chroma) / 2) # Project into range -n_chroma/2 .. n_chroma/2 # add on fixed offset of 10*n_chroma to ensure all values passed to # rem are +ve D = np.remainder(D + n_chroma2 + 10 * n_chroma, n_chroma) - n_chroma2 # Gaussian bumps - 2*D to make them narrower wts = np.exp(-0.5 * (2 * D / np.tile(binwidthbins, (n_chroma, 1)))**2) # normalize each column wts = librosa.util.normalize(wts, norm=2, axis=0) # Maybe apply scaling for fft bins if octwidth is not None: wts *= np.tile( np.exp(-0.5 * (((frqbins / n_chroma - ctroct) / octwidth)**2)), (n_chroma, 1)) # remove aliasing columns, copy to ensure row-contiguity return np.ascontiguousarray(wts[:, :int(1 + n_fft / 2)])
def chroma(sr, n_fft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=2): """Create a Filterbank matrix to convert STFT to chroma :usage: >>> # Build a simple chroma filter bank >>> chroma_fb = librosa.filters.chroma(22050, 4096) >>> # Use quarter-tones instead of semitones >>> chroma_fbq = librosa.filters.chroma(22050, 4096, n_chroma=24) >>> # Equally weight all octaves >>> chroma_fb = librosa.filters.chroma(22050, 4096, octwidth=None) :parameters: - sr : int > 0 audio sampling rate - n_fft : int > 0 number of FFT bins - n_chroma : int > 0 number of chroma bins - A440 : float Reference frequency for A440 - ctroct : float > 0 - octwidth : float or None ``ctroct`` and ``octwidth`` specify a dominance window - a Gaussian weighting centered on ``ctroct`` (in octs, A0 = 27.5Hz) and with a gaussian half-width of ``octwidth``. Set ``octwidth`` to ``None`` to use a flat weighting. :returns: - wts : ndarray, shape=(n_chroma, 1 + n_fft / 2) Chroma filter matrix """ wts = np.zeros((n_chroma, n_fft)) # Get the FFT bins, not counting the DC component frequencies = np.linspace(0, sr, n_fft, endpoint=False)[1:] frqbins = n_chroma * librosa.hz_to_octs(frequencies, A440) # make up a value for the 0 Hz bin = 1.5 octaves below bin 1 # (so chroma is 50% rotated from bin 1, and bin width is broad) frqbins = np.concatenate(([frqbins[0] - 1.5 * n_chroma], frqbins)) binwidthbins = np.concatenate((np.maximum(frqbins[1:] - frqbins[:-1], 1.0), [1])) D = np.subtract.outer(frqbins, np.arange(0, n_chroma, dtype='d')).T n_chroma2 = np.round(n_chroma / 2.0) # Project into range -n_chroma/2 .. n_chroma/2 # add on fixed offset of 10*n_chroma to ensure all values passed to # rem are +ve D = np.remainder(D + n_chroma2 + 10*n_chroma, n_chroma) - n_chroma2 # Gaussian bumps - 2*D to make them narrower wts = np.exp(-0.5 * (2*D / np.tile(binwidthbins, (n_chroma, 1)))**2) # normalize each column wts = librosa.util.normalize(wts, norm=2, axis=0) # Maybe apply scaling for fft bins if octwidth is not None: wts *= np.tile( np.exp(-0.5 * (((frqbins/n_chroma - ctroct)/octwidth)**2)), (n_chroma, 1)) # remove aliasing columns, copy to ensure row-contiguity return np.ascontiguousarray(wts[:, :(1 + n_fft/2)])
def __test(a440): freq = np.asarray([55, 110, 220, 440]) * (float(a440) / 440.0) octs = [1, 2, 3, 4] oct_out = librosa.hz_to_octs(freq, A440=a440) assert np.allclose(octs, oct_out)
def test_hz_to_octs(infile): DATA = load(infile) z = librosa.hz_to_octs(DATA["f"]) assert np.allclose(z, DATA["result"])
fps, endtime, height, width = ffmpeg.get_video_infos( os.path.join(dataset, video_names[i])) util.makedirs(os.path.join('./tmp/video2image', '%03d' % i)) ffmpeg.video2image( os.path.join(dataset, video_names[i]), os.path.join('./tmp/video2image', '%03d' % i, '%05d.jpg')) print('Generating voice...') sinmusic, musicinfos = notation.notations2music(notations, mode='sin') music = np.zeros_like(sinmusic) for i in range(len(musicinfos['time'])): for j in range(len(musicinfos['freq'][i])): if musicinfos['freq'][i][j] != 0: diff = np.abs( librosa.hz_to_octs(seed_freqs) - librosa.hz_to_octs(musicinfos['freq'][i][j])) index = np.argwhere(diff == np.min(diff))[0][0] _tone = seed_voices[index] _tone = sound.freq_correct(_tone, srcfreq=seed_freqs[index], dstfreq=musicinfos['freq'][i][j], alpha=1.0) _tone = sound.highlight_bass(_tone, musicinfos['freq'][i][j], seed_freqs[index]) music[int(musicinfos['time'][i] * 44100):int(musicinfos['time'][i] * 44100) + len(_tone)] += _tone # music = music+sinmusic*0.1 music = dsp.bpf(music, 44100, 20, 5000) music = (arrop.sigmoid(music / 32768) - 0.5) * 65536
def chroma(sr, n_fft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=None): """Create a Filterbank matrix to convert STFT to chroma :usage: >>> # Build a simple chroma filter bank >>> chroma_fb = librosa.filters.chroma(22050, 4096) >>> # Use quarter-tones instead of semitones >>> chroma_fbq = librosa.filters.chroma(22050, 4096, n_chroma=24) >>> # Down-weight the high and low frequencies >>> chroma_fb = librosa.filters.chroma(22050, 4096, ctroct=5, octwidth=2) :parameters: - sr : int audio sampling rate - n_fft : int FFT window size - n_chroma : int number of chroma bins - A440 : float Reference frequency for A440 - ctroct : float - octwidth : float These parameters specify a dominance window - Gaussian weighting centered on ctroct (in octs, re A0 = 27.5Hz) and with a gaussian half-width of octwidth. Defaults to halfwidth = inf, i.e. flat. :returns: - wts : ndarray, shape=(n_chroma, 1 + n_fft / 2) Chroma filter matrix """ wts = np.zeros((n_chroma, n_fft)) # Get the FFT bins, not counting the DC component frequencies = np.linspace(0, sr, n_fft, endpoint=False)[1:] fftfrqbins = n_chroma * librosa.hz_to_octs(frequencies, A440) # make up a value for the 0 Hz bin = 1.5 octaves below bin 1 # (so chroma is 50% rotated from bin 1, and bin width is broad) fftfrqbins = np.concatenate( ( [fftfrqbins[0] - 1.5 * n_chroma], fftfrqbins)) binwidthbins = np.concatenate( (np.maximum(fftfrqbins[1:] - fftfrqbins[:-1], 1.0), [1])) D = np.tile(fftfrqbins, (n_chroma, 1)) \ - np.tile(np.arange(0, n_chroma, dtype='d')[:, np.newaxis], (1, n_fft)) n_chroma2 = round(n_chroma / 2.0) # Project into range -n_chroma/2 .. n_chroma/2 # add on fixed offset of 10*n_chroma to ensure all values passed to # rem are +ve D = np.remainder(D + n_chroma2 + 10*n_chroma, n_chroma) - n_chroma2 # Gaussian bumps - 2*D to make them narrower wts = np.exp(-0.5 * (2*D / np.tile(binwidthbins, (n_chroma, 1)))**2) # normalize each column wts /= np.tile(np.sqrt(np.sum(wts**2, 0)), (n_chroma, 1)) # Maybe apply scaling for fft bins if octwidth is not None: wts *= np.tile( np.exp(-0.5 * (((fftfrqbins/n_chroma - ctroct)/octwidth)**2)), (n_chroma, 1)) # remove aliasing columns return wts[:, :(1 + n_fft/2)]