def high_frequency_weight(x,H,W,window_type): w=signal.get_window(window_type,W) x_framed=common.frame(x,H,W)*w[:,None] X_framed=np.fft.rfft(x_framed,axis=0)/np.sum(W) x_abs=np.abs(X_framed) scalar=np.arange(x_abs.shape[0])[:,None] hfw=np.sum(x_abs*scalar,axis=0) return hfw
def spectral_diff(x,H,W,window_type): w=signal.get_window(window_type,W) x_framed=common.frame(x,H,W)*w[:,None] X_framed=np.fft.rfft(x_framed,axis=0)/np.sum(W) xd_abs=np.abs(X_framed) sd=xd_abs[:,1:]-xd_abs[:,:-1] sd[sd<0]=0 sd=np.sum(sd,axis=0) return sd
def spectral_flux(x, H, W, window_type='hann'): if W >= 4: w = signal.get_window(window_type, W) else: w = np.ones(W) w /= np.sum(W) spec_flux_i = 0 X = np.fft.fft(common.frame(x, H, W) * w[:, None], axis=0) spec_flux = np.sum(np.abs(np.diff(np.abs(X), axis=0)), axis=0) return spec_flux
def spectral_flux(x, H, W, window_type='hann'): """ Frame-up x and take the DFT of each windowed frame. Compute the power spectrum for each frame. Form the sum of the absolute values of the differences between the adjacent bins. This is the spectral flux. Intuitively, a more jagged spectrum will give a greater value of spectral flux. """ w = signal.get_window(window_type, W) w /= np.sum(W) spec_flux_i = 0 X = np.fft.fft(common.frame(x, H, W) * w[:, None], axis=0) spec_flux = np.sum(np.abs(np.diff(np.abs(X), axis=0)), axis=0) return spec_flux
def filtered_local_max(x,H,W,a): """ a value of x is deemed the local maximum if it is a local maximum within a window of size W and is greater than a times the minimum local maximum """ x_f=common.frame(x,H,W) res=np.zeros(len(x)) for h,c in zip(np.arange(0,len(x)-W,H),x_f.T): max_n=local_max(c) if (len(max_n) == 0): # no local maxima continue if len(max_n) == 1: # there's no other local maximum to compare with, so we compare with # the mean value of the frame if c[max_n[0]] > a*np.mean(c): res[max_n[0]+h] = 1 continue c_max=c[max_n] max_n_sorted=np.argsort(c_max) #if (c_max[max_n_sorted[-1]] > a*c_max[max_n_sorted[0]]): if (c_max[max_n_sorted[-1]] > a*np.mean(c)): res[max_n[max_n_sorted[-1]]+h] = 1 return np.where(res>0)[0]
file_lengths=np.array([len(s) for s in sigs]) output_length=np.max(np.add.outer(attack_times,file_lengths)) x=np.zeros(output_length) for at,sig in zip(attack_times,sigs): x[at:at+len(sig)]+=sig x+=np.random.standard_normal(len(x))*1e-8 # generate some interesting analysis times n_rates=100 rates=np.random.standard_normal(n_rates)*2 interp_rates=np.interp( np.arange(output_length), np.linspace(0,output_length,n_rates), rates) interp_pos=np.cumsum(interp_rates) analysis_times=np.round(np.mean(common.frame(interp_pos,H,H),axis=0)).astype('int') adj_analysis_times=[av.adjust(at) for at in analysis_times] reset_times = np.where(np.array([b for t,b in adj_analysis_times]))[0] adj_analysis_times=np.array([t for t,b in adj_analysis_times]) plot_adj_atimes_x=np.sort(np.concatenate((adj_analysis_times,adj_analysis_times))) ones_osc=np.power(-1,np.arange(len(adj_analysis_times))) plot_adj_atimes_y=np.zeros(len(plot_adj_atimes_x)) plot_adj_atimes_y[1::2]=ones_osc plot_adj_atimes_y[2::2]=ones_osc[:-1] plot_adj_atimes_y[0]=-1 print(plot_adj_atimes_x[:10]) print(plot_adj_atimes_y[:10])
def var_fun(f,n): return np.ones_like(n)*V#rab_pitch.default_variance(n)*V def weight_fun(f,n): return np.power(1/(n+1),1) x=np.fromfile(filename,dtype='float64') C=rab_pitch.rab_pitch( [440*np.power(2,(n-69)/12)/sr for n in p_range], #variance=var_fun, weight=weight_fun, N_frame=N, N_window=W) #C._plot_tables() #plt.show() x=common.frame(x,H,N) X=C(x) # find index of maximum in each column X_argmax=np.argmax(X,axis=0) ax=plt.imshow(np.log(X),origin='lower',aspect='auto') ax.axes.scatter(np.arange(X.shape[1]),X_argmax) ax.axes.set_yticks([_ for _ in range(len(p_range))]) ax.axes.set_yticklabels(p_range) plt.show()
# to make the loudest samples equal to 1. The factors are interpolated between # linearly. Because this might cause clipping (imagine a segment of the sound is # quiet, but the maximum and a value close to it are close in time, a descending # ramp to the next factor might push the adjacent value above 1, causing # clipping), you should run the output through a limiter afterwards. from os import environ import numpy as np import common # Load a f32 format file (mono) INFILE = environ["INFILE"] OUTFILE = environ["OUTFILE"] # W is number of samples over which to determine the normalization factor W = float(environ["W"]) x = np.fromfile(INFILE, dtype='float32') X = common.frame(x, W // 2, W) # determine factors factors = np.max(np.abs(X), axis=0) # If maximum was 0, no scaling applied factors[factors == 0] = 1 factors = 1 / factors # times for the factors, so we can interpolate between them # we just put the factor at the middle of the window factor_i = common.hop_indicies(x, W // 2, W) + W // 2 len_y = (np.max(common.hop_indicies(x, W // 2, W)) + W).astype('int') factor_interp = np.interp(np.arange(len_y), factor_i, factors) y = x[:len_y] * factor_interp y.astype("float32").tofile(OUTFILE)
def local_rms(x,H,W): x_framed=common.frame(x,H,W) x_rms=np.sqrt(np.mean(x_framed**2,axis=0)) return x_rms
ge = common.get_env filename = ge("FILENAME", "/tmp/guit.f64", None) min_pitch = 36 max_pitch = 96 sr = 16000 x = np.fromfile(filename, dtype='float64') C = cqt.cqt(lambda N: signal.get_window("hann", N), 2048, ws=[ 2 * np.pi * 440 * np.power(2, (n - 69) / 12) / sr for n in range(min_pitch, max_pitch + 1) ]) x = common.frame(x, 256, 2048) X = C(x) X = np.abs(X) # Weight by adding octave equivalence W = np.zeros_like(X) for row in range(W.shape[0]): n = row cnt = 0 while n < W.shape[0]: W[row, :] += X[n, :] n += 12 cnt += 1 #if cnt > 0: # W[row,:]/=cnt X /= np.max(X)
N = 10000 W = 32 H = 8 # generate a signal x = np.random.standard_normal(N, ) # extend so the windowing gives a full number of overlaps for the beginning and # end when analysing at the output rate x_ext = np.concatenate( (np.zeros(W), x, np.zeros(W), np.zeros(H - (2 * W + len(x)) % H))) print((len(x_ext) - W) / H) # generate the analysis and synthesis window w = signal.get_window('blackmanharris', W) # compute the output scaling win_div = 1. / common.ola_shorten(np.power(w, 2), H) x_ext_fr = common.frame(x_ext, H, W) * w[:, None] X_ext = np.fft.rfft(x_ext_fr, axis=0) y_from_X_fr = np.fft.irfft(X_ext, axis=0) # build a signal that is a mix of the fourier transformed and the raw signal y_from_X_and_x_fr = np.zeros_like(y_from_X_fr) idcs = np.arange((len(x_ext) - W) // H) np.random.shuffle(idcs) idcs_X = idcs[:len(idcs) // 2] idcs_x = idcs[len(idcs) // 2:] y_from_X_and_x_fr[:, idcs_X] = y_from_X_fr[:, idcs_X] y_from_X_and_x_fr[:, idcs_x] = x_ext_fr[:, idcs_x] y_from_X = np.zeros_like(x_ext) for n, h in enumerate(np.arange(0, len(x_ext) - W, H)): y_from_X[h:h + W] += y_from_X_fr[:, n] * w
import cqt import numpy as np import matplotlib.pyplot as plt from scipy import signal import common filename = "/tmp/sines.f64" sr = 16000 x = np.fromfile(filename, dtype='float64') C = cqt.cqt(lambda N: signal.get_window("hann", N), 1024, ws=[ 2 * np.pi * 440 * np.power(2, (n - 69) / 12) / sr for n in range(60, 73) ]) x = common.frame(x, 256, 1024) X = C(x) plt.imshow(20 * np.log10(np.abs(X)), origin='lower', aspect='auto') plt.show()