def lpf(image, sigma, mode=2): (Mx, My) = image.shape if mode == 1: kernel = matlab_style_gauss2D(image.shape, sigma) kernel /= numpy.max(kernel) if Mx == 1 or My == 1: fft = numpy.fft.fft(image) fft = numpy.fft.fftshift(fft) fft *= kernel result = numpy.real(numpy.fft.ifft(numpy.fft.fftshift(fft))) else: fft = numpy.fft.fftshift(numpy.fft.fft2(image)) fft *= kernel result = numpy.real(numpy.fft.ifft2(numpy.fft.fftshift(fft))) elif mode == 2: new_dim = 2 * array(image.shape) kernel = matlab_style_gauss2D((new_dim[0], new_dim[1]), sigma * 2) kernel /= numpy.max(kernel) kernel = kernel[Mx:, My:] image = image.astype(numpy.double) if Mx == 1 or My == 1: dct = fftpack.dct(image, type=1) dct *= kernel result = numpy.real(fftpack.idct(dct, type=1)) else: dct = fftpack.dct(fftpack.dct(image.T, type=2, norm='ortho').T, type=2, norm='ortho') dct *= kernel result = numpy.real(fftpack.idct(fftpack.idct(dct.T, type=2, norm='ortho').T, type=2, norm='ortho')) return result
def doDCT (filename): DCTSlot = 30 img = cv2.imread(filename,0) img = cv2.resize(img, (200,200)) imf = np.float32(img) dct_ = dct(dct(imf.T, norm='ortho').T, norm='ortho') # dct_ = np.matrix([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]) # read as dct, return as a list list_ = zigzagRead_half(dct_) list_ = np.abs(list_) # print dct_ # print list_ # first 10 slots x, next 10 slots 2x, the third 10 slots 4x slotLen = len(list_) / 70 + 1 # slotLen = len(list_) / DCTSlot + 1 # print slotLen dct_result = np.zeros((DCTSlot,1)) # get into each slot for i in range(len(list_)): if i+1 < 10*slotLen: dct_result[(i+1) / slotLen] += list_[i] elif i+1 >=10*slotLen and i+1 < 30 * slotLen: dct_result[10 + (i+1-10*slotLen) / (2*slotLen)] += list_[i] else: dct_result[20 + (i+1-30*slotLen) / (4*slotLen)] += list_[i] # normalization base = np.linalg.norm(dct_result) dct_result /= base dct_result = np.transpose(dct_result) return dct_result
def fourier_interp(sig_objs): nws = len(sig_objs[0].aspec) nqs = len(sig_objs) pads = 8*nqs aspec = np.zeros([nws, nqs+pads]) aux = np.zeros(nqs) f = open('aspecinterp.dat', 'w') for iw in range(nws): for i, sig in enumerate(sig_objs): tmp = map(float, sig.aspec[iw].split()) #trace over valence manifold: aux[i] = sum(tmp[2:9]) # now have fourier coefficients interpolate back on to dense grid aux[:] = dct(aux[:], 2, norm='ortho') #dct type 3 is the auxd = np.pad(aux, (0, pads), 'constant') auxd = dct(auxd[:], 3, norm='ortho') for iq in range(len(auxd)): aspec[iw][iq] = auxd[iq] for iq in range(nqs+pads)[::-1]: #pads with zeros for iw in range(nws): print >>f, -iq, (sig.aspec[iw].split())[0], aspec[iw][iq] print >>f, '' for iq in range(nqs+pads): #pads with zeros for iw in range(nws): print >>f, iq, (sig.aspec[iw].split())[0], aspec[iw][iq] print >>f, '' f.close()
def energy_99 (): files = [] # directory for read files. directory = "/Users/cyan/Desktop/color_hist_py/cropImage_large/" for infile in glob.glob(os.path.join(directory,'*.jpg')): files.append(infile) # print "current file is " + infile result_idx = [] for i in range(100): print i img = cv2.imread(files[i],0) img = cv2.resize(img, (200,200)) imf = np.float32(img) dct_ = dct(dct(imf.T, norm='ortho').T, norm='ortho') # dct_ = np.matrix([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]) # read as dct, return as a list list_ = zigzagRead(dct_) total_energy = np.linalg.norm(list_) print "total enery" + str(total_energy) print np.linalg.norm(list_[:1]) for j in range(1, len(list_)+1): if np.linalg.norm(list_[:j]) > 0.99 * total_energy: print "J :" + str(j) result_idx.append(j) break return result_idx
def A_dct2(x, n, omega=None): """ Take the 2-dimensional type II DCT of the flattened (n,n) image contained in vector x. Works across columns. Parameters ---------- x : ndarray, shape (n*n,n_col) flattened image vector n : int image column/row size omega : ndarray support of the output (ie, indices at which output vector is sampled) Returns ------- y = dct2(x.reshape(n,n))[omega] """ col_shapes = x.shape[1:] if len(x.shape) > 1 else () x.shape = (n,n) + col_shapes y = fftpack.dct(x, type=2, axis=0, norm='ortho') y = fftpack.dct(y, type=2, axis=1, norm='ortho') x.shape = (n*n,) + col_shapes # this syntax needed because y is discontiguous in memory y = np.reshape(y, x.shape) if omega: return np.take(y, omega, axis=0) return y
def dct_distance(rep_one, rep_two, norm=True, num_coefficients=3): if not isinstance(rep_one, np.ndarray): rep_one = rep_one.to_array() if not isinstance(rep_two, np.ndarray): rep_two = rep_two.to_array() assert (rep_one.shape[1] == rep_two.shape[1]) num_bands = rep_one.shape[1] distance = 0 for i in range(num_bands): try: source_dct = dct(rep_one[:, i], norm='ortho') except ValueError: print(rep_one) raise if norm: source_dct = source_dct[1:] source_dct = source_dct[0:num_coefficients] target_dct = dct(rep_two[:, i], norm='ortho') if norm: target_dct = target_dct[1:] target_dct = target_dct[0:num_coefficients] if len(target_dct) < num_coefficients: source_dct = source_dct[:len(target_dct)] if len(source_dct) < num_coefficients: target_dct = target_dct[:len(source_dct)] distance += euclidean(source_dct, target_dct) return distance / num_bands
def dctFromPixel(luminance_image, x, y): # get the square of size sampling_side centered at (x, y) neighbors = find_neighbors(luminance_image, x, y) # compute the DCT for the square and reshape it into a 1D array discrete_cosine_transform = dct(dct(neighbors.T, norm='ortho').T, norm='ortho') feature = np.reshape(discrete_cosine_transform, -1).tolist() return feature
def A_dct2(x, n, omega): """ Take the 2-dimensional type II DCT of the flattened (n,n) image contained in vector x. Parameters ---------- x : ndarray, shape (n*n,) flattened image vector n : int image column/row size omega : ndarray support of the output (ie, indices at which output vector is sampled) Returns ------- y = dct2(x.reshape(n,n))[omega] """ x.shape = (n,n) y = fftpack.dct(x, type=2, axis=0, norm='ortho') y = fftpack.dct(y, type=2, axis=1, norm='ortho') x.shape = (n*n,) return y.flat[omega]
def extract_dct_features(time_windows, class_attr=None, n_comps=48): X_matrix = [] y_vect = None if class_attr is not None: y_vect = [] for tw in time_windows: x = tw['x'].values y = tw['y'].values z = tw['z'].values m = mag(x,y,z) dct_x = np.abs(fftpack.dct(x)) dct_y = np.abs(fftpack.dct(y)) dct_z = np.abs(fftpack.dct(z)) dct_m = np.abs(fftpack.dct(m)) v = np.array([]) v = np.concatenate((v, dct_x[:n_comps])) v = np.concatenate((v, dct_y[:n_comps])) v = np.concatenate((v, dct_z[:n_comps])) v = np.concatenate((v, dct_m[:n_comps])) X_matrix.append(v) if y_vect is not None: y_vect.append(tw[class_attr].iloc[0]) X_matrix = np.array(X_matrix) if y_vect is None: return X_matrix else: return X_matrix, y_vect
def process_cube(img, weight, quality): # TODO: check to make sure that size of img, and q_tables are consistent img = img.copy() # print('process_cube input: {}'.format(img)) this_quality = np.round(np.max(weight)*quality) if this_quality < 0: this_quality = 0 if this_quality > quality - 1: this_quality = quality - 1 for i in range(img.shape[3]): img[:, :, :, i] = cv2.cvtColor(img[:, :, :, i], cv2.COLOR_BGR2LAB) img = np.float32(img) # print('process_cube pre DCT: {}'.format(img)) # img_dct = dct(dct(dct(img, axis=0)/4, axis=1)/4, axis=3)/4 img_dct = dct(dct(img, axis=0)/4, axis=1)/4 Q_luma = luminance_tables[:, :, :, this_quality].astype(np.float32) Q_chroma = chrominance_tables[:, :, :, this_quality].astype(np.float32) # Q_luma[:, :, :] = .01 # Q_chroma[:, :, :] = .01 # print('Q_luma: {}'.format(Q_luma)) # print('Q_chroma: {}'.format(Q_chroma)) # print('dct, pre rounding: {}'.format(img_dct)) img_dct[:, :, 0, :] /= Q_luma img_dct[:, :, 1, :] /= Q_chroma img_dct[:, :, 2, :] /= Q_chroma img_dct = np.round(img_dct) img_dct[:, :, 0, :] *= Q_luma img_dct[:, :, 1, :] *= Q_chroma img_dct[:, :, 2, :] *= Q_chroma # print('dct, post rounding: {}'.format(img_dct)) # img_processed = idct(idct(idct(img_dct, axis=0)/4, axis=1)/4, axis=3)/4 img_processed = idct(idct(img_dct, axis=0)/4, axis=1)/4 # print('process_cube post DCT: {}'.format(img_processed)) img_processed = np.clip(img_processed, 0, 255) img_processed = np.uint8(img_processed) for i in range(img.shape[3]): img_processed[:,:,:,i] = cv2.cvtColor(img_processed[:,:,:,i], cv2.COLOR_LAB2BGR) # print('process_cube output: {}'.format(img)) # print('pre dct / post_dct: {}'.format(pre_dct / post_dct)) return img_processed
def fastChebScalar(self, fj, fk): """Fast Chebyshev scalar product.""" N = fj.shape[0] if self.quad == "GL": fk = dct(fj, 2, axis=0)*np.pi/(2*N) elif self.quad == "GC": fk = dct(fj, 1, axis=0)*np.pi/(2*(N-1)) return fk
def discrete_cosine(reference, target): reference_transform = fftpack.dct( reference ) target_transform = fftpack.dct( target ) reference_curve = reference_transform.mean(axis = 1) target_curve = target_transform.mean(axis = 1) return reference_transform, target_transform, reference_curve, target_curve
def dct2(arr): """ @todo: 2D-dct, constructed from scipy's dct2 @params { np.ndarray } arr @return { np.ndarray } """ array = np.float64(arr) result = dct(dct(array, axis=0), axis=1) return result
def laplacian_pca_TV(res, x, f0, lam, gam, iter = 10): ''' TV version of Laplacian embedding :param res: resolution of the grid :param x: numpy array of data in rows :param f0: initial embedding matrix :param lam: sparsity parameter :param gam: fidelity parameter :param iter: number of iterations to carry out :return: returns embedding matrix ''' # f0 is an initial projection n = res ** 2 num_data = x.shape[0] D = sparse_discrete_diff(res) M = 1/(lam*laplacian_eigenvalues(res).reshape(n)+gam) f = f0 y = x .dot(f) z = shrink(y .dot(D.T), lam) for i in range(iter): # Update z z_old = z z = shrink(y .dot (D.T), lam) # Update f f_old = f u, s, v = la.svd(x.T .dot (y), full_matrices=False) f = u .dot(v) # Update y y_old = y q = lam * z .dot (D) + gam * x .dot(f) # print('norm of y before is %f' % np.sum(q ** 2)) y = fftpack.dct(q.reshape((num_data, res, res)), norm='ortho') # Images unraveled as rows y = fftpack.dct(np.swapaxes(y,1,2), norm='ortho') # Swap and apply dct on the other side # print('norm of y after is %f' % np.sum(y ** 2)) y = np.apply_along_axis(lambda v: M * v, 1, y.reshape((num_data, n))) y = fftpack.idct(y.reshape((num_data, res, res)), norm='ortho') y = fftpack.idct(np.swapaxes(y,1,2), norm='ortho') y = y.reshape((num_data, n)) zres = np.sqrt(np.sum((z - z_old) ** 2)) znorm = np.sqrt(np.sum((z)**2)) yres = np.sqrt(np.sum((y - y_old) ** 2)) ynorm = np.sqrt(np.sum((y)**2)) fres = np.sqrt(np.sum((f - f_old) ** 2)) value = np.sum(abs(z)) + 0.5*lam*np.sum((z-y .dot(D.T))**2) + 0.5*gam*np.sum((y- x .dot(f) )**2) print('Iter %d Val %f Z norm %f Z res %f Ynorm %f Y res %f F res %f' % (i, value, znorm, zres, ynorm, yres, fres)) return f
def ifct(self, fk, cj): """Inverse fast Chebyshev transform.""" if self.quad == "GL": cj = 0.5*dct(fk, 3, axis=0) cj += 0.5*fk[0] elif self.quad == "GC": cj = 0.5*dct(fk, 1, axis=0) cj += 0.5*fk[0] cj[::2] += 0.5*fk[-1] cj[1::2] -= 0.5*fk[-1] return cj
def dct2(y): M = y.shape[0] N = y.shape[1] a = empty([M,N],float) b = empty([M,N],float) for i in range(M): a[i,:] = dct(y[i,:],norm='ortho') for j in range(N): b[:,j] = dct(a[:,j],norm='ortho') return b
def At_dct2(y, n, omega): """ Take the 2-dimensional type III DCT of the flattened (n,n) matrix contained in vector y. This is the adjoint operator to the A_dct2 operator defined above """ y2 = np.zeros((n,n), 'd') y2.flat[omega] = y w = fftpack.dct(y2, type=3, axis=0, norm='ortho') w = fftpack.dct(w, type=3, axis=1, norm='ortho') return w.flatten()
def multiwavelet_from_rgb(rgb): from scipy.fftpack import dct from pywt import wavedec2 r = rgb[:, :, 0].astype(np.float) g = rgb[:, :, 1].astype(np.float) dctr = dct(r, norm='ortho').ravel() dctg = dct(g, norm='ortho').ravel() daubr = _unpack(wavedec2(r, 'db4')) daubg = _unpack(wavedec2(g, 'db4')) return np.hstack([dctr, dctg, daubr, daubg])
def calculate_DCTII_2D(matrix): """ Calculates the 2D transform of the DCT II algorithm. Assumes a square matrix. See: http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II We are using the plain version, which seems to work better. """ a = numpy.reshape(numpy.array(matrix), (32, 32)) return fftpack.dct(fftpack.dct(a.T).T)
def fct(self, fj, cj): """Fast Chebyshev transform.""" N = fj.shape[0] if self.quad == "GL": cj = dct(fj, 2, axis=0) cj /= N cj[0] /= 2 elif self.quad == "GC": cj = dct(fj, 1, axis=0)/(N-1) cj[0] /= 2 cj[-1] /= 2 return cj
def fastChebScalar(self, fj, fk): """Fast Chebyshev scalar product.""" if self.fast_transform: N = fj.shape[0] if self.quad == "GL": fk = dct(fj, 2, axis=0)*np.pi/(2*N) elif self.quad == "GC": fk = dct(fj, 1, axis=0)*np.pi/(2*(N-1)) else: if self.points is None: self.init(fj.shape[0]) fk[:] = np.dot(self.V, fj*self.weights) return fk
def __init__(self, audiofile, mel_bands = 40, weight=2): super(MFCC, self).__init__(audiofile, skip_frames=3); self.weight = float(weight) self.mel_bands = mel_bands self.powerspectrum() self.map_to_mel() self.calc_gradient() #plt.figure() self.mfcc = dct(self.processed, type=2, norm="ortho", axis=0)[:13] #plt.imshow(mfcc, origin="lower", aspect="auto", interpolation="nearest") self.delta = dct(self.delta, type=2, norm="ortho", axis=0)[:13] self.processed = np.zeros(26) self.processed[:13] = np.mean(self.mfcc, axis = 1) self.processed[13:] = np.mean(self.delta, axis = 1)
def razafindradina_embed(grayscale_container_path, grayscale_watermark_path, watermarked_image_path, alpha): """ Razafindradina embedding method implementation. Outputs the resulting watermarked image 23-July-2015 """ grayscale_container_2darray = numpy.asarray(Image.open(grayscale_container_path).convert("L")) grayscale_watermark_2darray = numpy.asarray(Image.open(grayscale_watermark_path).convert("L")) assert ( (grayscale_container_2darray.shape[0] == grayscale_container_2darray.shape[1]) and (grayscale_container_2darray.shape[0] == grayscale_watermark_2darray.shape[0]) and (grayscale_container_2darray.shape[1] == grayscale_watermark_2darray.shape[1]) ), "GrayscaleContainer and GrayscaleWatermark sizes do not match or not square" # Perform DCT on GrayscaleContainer # print grayscale_container_2darray gcdct = dct(dct(grayscale_container_2darray, axis=0, norm="ortho"), axis=1, norm="ortho") # print grayscale_container_2darray # Perform SchurDecomposition on GrayscaleWatermark gwsdt, gwsdu = schur_decomposition(grayscale_watermark_2darray) # alpha-blend GrayscaleWatermark TriangularMatrix into GrayscaleContainer DCT coeffs with alpha gcdct += gwsdt * alpha # Perform IDCT on GrayscaleContainer DCT coeffs to get WatermarkedImage watermarked_image_2darray = idct(idct(gcdct, axis=0, norm="ortho"), axis=1, norm="ortho") watermarked_image_2darray[watermarked_image_2darray > 255] = 255 watermarked_image_2darray[watermarked_image_2darray < 0] = 0 watermarked_image = Image.fromarray(numpy.uint8(watermarked_image_2darray)) # watermarked_image.show() # Write image to file watermarked_image.save(watermarked_image_path) return
def idct2(b,c,d): #M = b.shape[0] #N = b.shape[1] M = c N = d a = empty([64,64],float) y = empty([64,64],float) for i in range(M): a[i,:] = dct(b[i,:]) for j in range(N): y[:,j] = dct(a[:,j]) return y
def _frame_hash(self, im, hasher): im_gray = im.convert('F') im_small = _resize_to_width(im_gray, self._standard_width) mat = numpy.asarray(im_small, dtype=numpy.float32) - 128 edge_width = self._edge_width mat_core = mat[edge_width:(mat.shape[0]-edge_width), edge_width:(mat.shape[1]-edge_width)] mat_dct = fftpack.dct(fftpack.dct(mat_core, norm='ortho').T, norm='ortho').T _, height_small = im_small.size hasher.update('%d' % (height_small / self._height_split)) for ii in range(0, self._dct_core_width): for jj in range(0, self._dct_core_width): hasher.update(self._prepare_coeff(mat_dct[ii][jj]))
def temporal_dc_variation_feature_extraction(frames): ''' computes dt_dc_measure 1 ''' mbsize = 16 row = frames.shape[0] col = frames.shape[1] motion_vects = zeros(shape=(2,row*col/mbsize**2,frames.shape[2]-1)) for x in xrange(0,frames.shape[2]-1):#xrange is inclusive at beginning, exclusive at end, end 1 early since x+1 imgP = frames[:,:,x+1] imgI = frames[:,:,x] motion_vects[:,:,x], temp = motionEstNTSS(imgP,imgI,mblock,7) #motion_vects.dump(open("./pythonvects", "wb")) dct_motion_comp_diff = zeros(shape=(row,col,frames.shape[2]-1)) for x in xrange(0,frames.shape[2]-1): mbCount = 0 for i in xrange(0,row-mbsize+1,mbsize): for j in xrange(0,col-mbsize+1,mbsize): dct_motion_comp_diff[i:i+mbsize-1,j:j+mbsize-1,x] = dct(dct(((frames[i:i+mbsize-1,j:j+mbsize-1,x+1].astype(np.float))- frames[i+motion_vects[0,mbCount,x]:i+mbsize-1+motion_vects[0,mbCount,x], j+motion_vects[1,mbCount,x]:j+mbsize-1+motion_vects[1,mbCount,x],x].astype(np.float).clip(min=0)).astype(np.float),norm="ortho").transpose(),norm="ortho").transpose(); #print "i" #print i #print (frames[i:i+mbsize-1,j:j+mbsize-1,x+1].astype(np.float)- # frames[i+motion_vects[0,mbCount,x]:i+mbsize-1+motion_vects[0,mbCount,x], # j+motion_vects[1,mbCount,x]:j+mbsize-1+motion_vects[1,mbCount,x],x].astype(np.float)).astype(np.float) #print frames[i:i+mbsize-1,j:j+mbsize-1,x+1] #print frames[i+motion_vects[0,mbCount,x]:i+mbsize-1+motion_vects[0,mbCount,x], # j+motion_vects[1,mbCount,x]:j+mbsize-1+motion_vects[1,mbCount,x],x] mbCount = mbCount + 1 dct_motion_comp_diff.dump(open("./pythonvects", "wb")) std_dc = zeros(shape=(frames.shape[2]-1)) for i in xrange(0,frames.shape[2]-1): temp = im2colDistinct(dct_motion_comp_diff[:,:,i],(16,16)); std_dc[i] = np.std(temp) dt_dc_temp = zeros(shape=(std_dc.shape[0]-1))#this will be 1 smaller than std_dc for i in xrange(0,len(std_dc) - 1): dt_dc_temp[i] = abs(std_dc[i+1]-std_dc[i]) print 'dt_dc_temp' print dt_dc_temp.shape print dt_dc_temp dt_dc_measure1 = np.mean(dt_dc_temp) print 'dt_dc_measure1' print dt_dc_measure1
def transform(data): result = [] for i in range(len(data[0])): result.append([]) for i in range(len(data)): partial_result = dct(data[i]) for j in range(len(partial_result)): result[j].append(partial_result[j]) final_result = [] for i in range(len(result)): partial_result = dct(result[i]) final_result.append(partial_result) return final_result
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, winfunc=lambda x:numpy.ones((x,))): """Compute MFCC features from an audio signal. :param signal: the audio signal from which to compute features. Should be an N*1 array :param samplerate: the samplerate of the signal we are working with. :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) :param numcep: the number of cepstrum to return, default 13 :param nfilt: the number of filters in the filterbank, default 26. :param nfft: the FFT size. Default is 512. :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. :param winfunc: the analysis window to apply to each frame. By default no window is applied. :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. """ feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc) feat = numpy.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] feat = lifter(feat,ceplifter) if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy return feat
def _process(self, data): data = np.abs(data) bfcc = dct(safe_log(data), axis=1) \ [:, self._exclude: self._exclude + self._n_coeffs] yield ArrayWithUnits( bfcc.copy(), [data.dimensions[0], IdentityDimension()])
def mfcc(s, fs): #Constants N = 256 M = 100 P = 30 l = int(math.ceil((s.size-N+1)/M)) #Allocate c array c = np.zeros((P,l)); for x in range(0,l-1): #Frame start = x * M; frame = s[start:start+N]; #Window w = np.hamming(N) windFrame = frame * w #FFT frameFFT = np.fft.fft(windFrame) #Mel-Frequency Wrapping m = get_filterbanks(P,N,fs) n2 = math.floor(N/2) ms = np.dot(m , abs(np.power(frameFFT[0:n2+1],2))) #Last step, compute mel-frequency cepstrum coefficients c[:,x] = fft.dct(np.log(ms.clip(min=0.00001))); np.delete(c,0,0) # exclude 0'th order cepstral coefficient return c
def mfsc_to_sp(mfsc, alpha=0.45, N=2048): mc = sfft.dct(mfsc, norm='ortho') # Mel cepstrum sp = pysptk.conversion.mc2sp(mc, alpha, N) # Spectral envelope return sp
def a_dct(l, m): tmp = dct(l, type=2, norm = 'ortho') tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k])) tmp[tmp_idx[m:]] = 0 return tmp
def mfcc(sig, sr, nbins=40): #calculate power spectrum of the signal pw = pwrspec(sig, sr, nbins) #DCT of log of power spectrum return dct(np.log(pw))
def Atdct(y): x = np.zeros(m) x[pix_idx] = y x = dct(x, type=2, norm='ortho') return x
def dct2(block): return dct(dct(block.T, norm='ortho').T, norm='ortho')
def dct2d(a): return fftpack.dct(fftpack.dct(a, axis=0), axis=1)
### setting the range tstart=-2*N; tend=3*N # extended range to see periodicity t=linspace(tstart,tend,500) # continuous range nrange=arange(N) #discrete range nextended=arange(tstart,tend) # extended discrete range to see periodicity dctrange=arange(0+.25,N/2+.25,.5) # shifted range to compare the DCT and DFT ### Analysis # Console output of DCT types 1-4 and DFT # Note: with scaling, I'm looking for factors that conserve # signal energy across domains. print 'type 1: ',dct(sequence,type=1) print 'type 1 (scaled):',dct(sequence,type=1)*2/(1*N) print 'type 2: ',dct(sequence,type=2) print 'type 2 (scaled):',dct(sequence,type=2)*2/(1*N) print 'type 3: ',dct(sequence,type=3) print 'type 3 (scaled):',dct(sequence,type=3)*2/(1*N) print 'type 4: ',array(map(abs,DCTx(nrange))) print 'dft: ',array(map(abs,(DFTx(nrange)))) ### Plotting #close('all') myfig=figure(1,figsize=(16,8), dpi=120) clf() subplots_adjust(wspace=.15,hspace=0.03)
def dct_2d(image): return fftpack.dct(fftpack.dct(image.T, norm='ortho').T, norm='ortho')
def dct2d(x, inverse=False): t = 2 if not inverse else 3 temp = dct(x, type=t, norm='ortho').transpose() return dct(temp, type=t, norm='ortho').transpose()
def dct2d(data): return fftpack.dct(fftpack.dct(data, norm='ortho').T, norm='ortho').T
def DCT_all_blocks(img,w): ''' computes the DCT II of all the wxw overlapping blocks in img. ''' Q_aux = view_as_windows(img, w).reshape(-1, w, w) return dct(dct(Q_aux, axis=1, norm='ortho'), axis=2, norm='ortho')
def dct2(x): return dct(dct(x, norm='ortho').T, norm='ortho').T
def get_2D_dct(img): """ Get 2D Cosine Transform of Image """ return fftpack.dct(fftpack.dct(img.T, norm='ortho').T, norm='ortho')
pad_signal = np.concatenate((signal, zeros)) #split into frames indices = np.tile(np.arange(0, frame_length), (frames_num, 1)) + np.tile( np.arange(0, frames_num * frame_step, frame_step), (frame_length, 1)).T indices = np.array(indices, dtype=np.int32) frames = pad_signal[indices] frames *= np.hamming(frame_length) #FFT and abs complex_spectrum = np.fft.rfft(frames, NFFT).T absolute_complex_spectrum = np.abs(complex_spectrum) #create triangular filter and plot it fb = get_filter_banks(filters_num, NFFT, fs, low_freq, high_freq) plt.figure(3) plt.title('fb') for i in range(filters_num): plt.plot(fb[i]) #signal inner product with triangular filter feat = np.dot(np.transpose(absolute_complex_spectrum), np.transpose(fb)) feat = np.where(feat == 0, np.finfo(float).eps, feat) feat = np.log(feat) #Apply DCT feat = dct(feat, norm='ortho')[:, :filters_num] print(feat.shape) final = 'train_%d.npy' % (k + 30) np.save(final, feat)
x = np.array([1.0,2.0,-1.0,1.5]) y = fft(x) print(y) from scipy.fftpack import ifft x = np.array([1.0,2.0,-1.0,1.5]) y = fft(x) yinv = ifft(y) print(yinv) from scipy import fftpack time_step = 0.02 period = 5. time_vec = np.arange(0,20,time_step) sig = np.sin(2*np.pi/period*time_vec) + 0.5*np.random.randn(time_vec.size) print(sig.size) sample_freq = fftpack.fftfreq(sig.size,d = time_step) sig_fft = fftpack.fft(sig) print(sig_fft) from scipy.fftpack import dct from scipy.fftpack import idct mydict =dct(np.array([4.,3.,5.,10.,5.,3.])) print(mydict) d = idct(np.array([4.,3.,5.,10.,5.,3.0])) print(d)
def dct2(a): return dct(dct(a.T, norm='ortho').T, norm='ortho')
def transform(self, x): return dct(x)
for i in range(total_frames): for j in range(framesize): if ((i * overlap + j) < num_samples): frames[i][j] = data[i * overlap + j] else: frames[i][j] = 0 saw_filter_a = signal.waveforms.sawtooth(range(len(frames)), width=[0.5]) saw_filter_b = signal.waveforms.sawtooth(range(len(frames)), width=[0.6]) for i in range(total_frames): dft_matrix[i] = np.fft.fft(frames[i]) dft_matrix[i] = signal.filtfilt(saw_filter_a, saw_filter_b, dft_matrix[i]) dft_matrix[i] = dct(dft_matrix[i]) abs_dft_matrix[i] = abs(dft_matrix[i]) * abs(dft_matrix[i]) / max( abs(dft_matrix[i])) abs_dft_matrix[i] = np.log10(abs_dft_matrix[i]) f = open(file_name[:-4] + ".logFBE", "w+") f.writelines(str(abs_dft_matrix)) t = range(len(abs_dft_matrix)) plt.plot(t, abs_dft_matrix) plt.ylabel("Frequency") plt.xlabel("Frame number") except Exception as e: print("Exception thrown as: " + str(e)) pass
def create_mfcc(file_name, start_point): file_path = 'F:/Projects/speech/speeches mail/' + str(file_name) sample_rate, signal = scipy.io.wavfile.read(file_path) signal = signal[start_point:int(start_point + 3 * sample_rate)] #framing to 3 seconds emphasized_signal = numpy.append(signal[0], signal[1:] - pre_emphasis * signal[:-1]) frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate # Convert from seconds to samples signal_length = len(emphasized_signal) frame_length = int(round(frame_length)) frame_step = int(round(frame_step)) num_frames = int( numpy.ceil( float(numpy.abs(signal_length - frame_length)) / frame_step)) # Make sure that we have at least 1 frame pad_signal_length = num_frames * frame_step + frame_length z = numpy.zeros((pad_signal_length - signal_length)) pad_signal = numpy.append( emphasized_signal, z ) # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal indices = numpy.tile(numpy.arange( 0, frame_length), (num_frames, 1)) + numpy.tile( numpy.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T frames = pad_signal[indices.astype(numpy.int32, copy=False)] frames *= numpy.hamming(frame_length) #hamming window mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT)) # Magnitude of the FFT pow_frames = ((1.0 / NFFT) * ((mag_frames)**2)) # Power Spectrum low_freq_mel = 0 high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700) ) # Convert Hz to Mel mel_points = numpy.linspace(low_freq_mel, high_freq_mel, nfilt + 2) # Equally spaced in Mel scale hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz bin = numpy.floor((NFFT + 1) * hz_points / sample_rate) fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1)))) for m in range(1, nfilt + 1): f_m_minus = int(bin[m - 1]) # left f_m = int(bin[m]) # center f_m_plus = int(bin[m + 1]) # right for k in range(f_m_minus, f_m): fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1]) for k in range(f_m, f_m_plus): fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m]) filter_banks = numpy.dot(pow_frames, fbank.T) filter_banks = numpy.where(filter_banks == 0, numpy.finfo(float).eps, filter_banks) # Numerical Stability filter_banks = 20 * numpy.log10(filter_banks) # dB mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1:(num_ceps + 1)] # Keep 2-13 (nframes, ncoeff) = mfcc.shape n = numpy.arange(ncoeff) lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter) mfcc *= lift #* filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8) mfcc -= (numpy.mean(mfcc, axis=0) + 1e-8) return mfcc
def tl_nmf(Y, K, Phi=None, W=None, H=None, regul=None, max_iter=300, n_iter_tl=5, tol=1e-4, verbose=False, rng=None): '''Runs Transform learning NMF Parameters ---------- Y : array, shape (M, N) Frames matrix K : int Rank of the learned feature matrices. Phi : array, shape (M, M) | 'random' | 'dct' | None, optional Initial Transform. Should be orthogonal. If 'random', start from a random orthogonal matrix. If 'dct', start from the DCT coefficients. Random by default W : array, shape (M, K) | None, optional Initial dictionnary. H : array, shape (K, N) | None, optional Initial activations. regul : float | None, optional Level of regularization. By default, a heuristic is used. max_iter : int, optional Maximal number of iterations for the algorithm n_iter_tl : int, optional Number of iteration of Transform learning between NMF steps tol : float, optional tolerance for the stopping criterion. Iterations stop when two consecutive iterations of the algorithm have a relative objective change lower than tol. verbose : boolean, optional Wether to print or not informations about the current state rng : RandomState, optional random seed of the algorithm Returns ------- Phi : array, shape (M, M) The estimated transform matrix W : array, shape (M, K) The estimated dictionnary H : array, shape (K, N) The estimated activations Phi_init : array, shape (M, M) Initial Phi infos_list : dict Contains various metrics monitoring convergence. Same as printed by Verbose. ''' regul_type = 'sparse' M, N = Y.shape rng = check_random_state(rng) # Initialization if regul is None: regul = 1e6 * float(K) / M if type(Phi) is not np.ndarray: if Phi is None: Phi = 'random' if Phi == 'random': Phi = unitary_projection(rng.randn(M, M)) elif Phi == 'dct': Phi = fftpack.dct(np.eye(M), 3, norm='ortho') if W is None: W = np.abs(rng.randn(M, K)) + 1. W = W / np.sum(W, axis=0) if H is None: H = np.abs(rng.randn(K, N)) + 1. X = np.dot(Phi, Y) V = X ** 2 # Initial spectrogram V_hat = np.dot(W, H) # Initial factorization obj = is_div(V, V_hat) + regul * penalty(H, regul_type) # Objective Phi_init = Phi.copy() # Monitoring obj_list = [] eps_list = [] tl_obj_list = [] nmf_obj_list = [] d_phi_list = [] d_phi_i_list = [] # Verbose if verbose: print('Running TL-NMF with %s regularization on a %d x %d ' 'problem with K = %d' % (regul_type, M, N, K)) print(' | '.join([name.center(8) for name in ["iter", "obj", "eps", "NMF", "TL", "d_phi", "d_phi_i"]])) for n in range(max_iter): # NMF if regul_type == 'smooth': W, H = update_nmf_smooth(V, W, H, V_hat, regul) else: W, H = update_nmf_sparse(V, W, H, V_hat, regul) # Transform Learning V_hat = np.dot(W, H) obj1 = is_div(V, V_hat) + regul * penalty(H, regul_type) Phi_old = Phi.copy() Phi, X = fast_transform_learning(Phi, X, V_hat, n_iter_tl) V = X ** 2 # Monitoring old_obj = obj.copy() obj = is_div(V, V_hat) + regul * penalty(H, regul_type) eps = (old_obj - obj) / (np.abs(obj) + np.abs(old_obj)) eps1 = old_obj - obj1 eps2 = obj1 - obj delta_phi = np.mean(np.abs(Phi - Phi_old)) delta_phi_init = np.mean(np.abs(Phi - Phi_init)) obj_list.append(obj) eps_list.append(eps) tl_obj_list.append(eps2) nmf_obj_list.append(eps1) d_phi_list.append(delta_phi) d_phi_i_list.append(delta_phi_init) # Terminaison if np.abs(eps) < tol: break if verbose: print(' | '.join([("%d" % (n+1)).rjust(8), ("%.2e" % obj).rjust(8), ("%.2e" % eps).rjust(8), ("%.2e" % eps1).rjust(8), ("%.2e" % eps2).rjust(8), ("%.2e" % delta_phi).rjust(8), ("%.2e" % delta_phi_init).rjust(8)])) infos = dict(obj_list=obj_list, eps_list=eps_list, tl_obj_list=tl_obj_list, nmf_obj_list=nmf_obj_list, d_phi_list=d_phi_list, d_phi_i_list=d_phi_i_list) return Phi, W, H, Phi_init, infos
def Adct(x): y = dct(x, type=3, norm='ortho') y = y[pix_idx] return y
window = tk.Tk(className="bla") original = Image.fromarray(np.round(datal[:, :, ::-1] * 255).astype(np.uint8)) canvas = tk.Canvas(window, width=original.size[0], height=original.size[1]) canvas.pack() image_tk = ImageTk.PhotoImage(original) canvas.create_image(original.size[0] // 2, original.size[1] // 2, image=image_tk) db = datal[:, :, 0] dg = datal[:, :, 1] dr = datal[:, :, 2] dcb = fftpack.dct(fftpack.dct(db.T / w).T / h) dcg = fftpack.dct(fftpack.dct(dg.T / w).T / h) dcr = fftpack.dct(fftpack.dct(dr.T / w).T / h) sb = np.sign(dcb[dh:, dw:]) sg = np.sign(dcb[dh:, dw:]) sr = np.sign(dcb[dh:, dw:]) sb[sb == 0] = 1 sg[sg == 0] = 1 sr[sr == 0] = 1 allh = np.empty(( 3, h - dh, w - dw,
def DCT2(mat): return np.round(dct(dct(mat, norm='ortho').T, norm='ortho') // q_table)
mse_bo = 10 * np.log10((lp.norm(x - x1)**2) / lp.norm(x)**2) print('BSBL-BO exit on %d loop' % clf.count) plt.figure() plt.plot(x, linewidth=3) plt.plot(x1, 'r-') plt.title('MSE of BO (directly) is ' + str(mse_bo) + 'dB') plt.legend({'Original', 'Recovered'}) #=========================== Second Method ============================== # First recover the signal's coefficients in the DCT domain; # Then recover the signal using the DCT ceofficients and the DCT basis #========================================================================= A = np.zeros([M, N], dtype='float') for k in xrange(M): dct_k = sf.dct(Phi[k, :].astype('float'), norm='ortho') A[k, :] = dct_k.copy() # clf = bsbl.bo(verbose=1, learn_type=1, learn_lambda=2, prune_gamma=-1, epsilon=1e-8, max_iters=16) rev_dct_coeff = clf.fit_transform(A, y, blk_start_loc=groupStartLoc) # IDCT only accept 'row' vector ! x2 = sf.idct(rev_dct_coeff, norm='ortho') # mse_bo_dct = 10 * np.log10((lp.norm(x - x2)**2) / lp.norm(x)**2) print('BSBL-BO exit on %d loop' % clf.count)
def extract_mfcc(self, draw=False): # 端点检测 # begin, end = self.endpoint_detection(draw=draw) # 预加重 self.pre_emphasis() # y = self.y[begin:end] y = self.y # 分帧 sig_len = len(y) frame_size, frame_stride = 0.025, 0.01 frame_len, frame_step = round(frame_size*self.sampling_freq), round(frame_stride*self.sampling_freq) num_frames = math.ceil((sig_len-frame_len) / frame_step) if num_frames is 0: return False, False pad_sig_len = num_frames*frame_step + frame_len pad_sig = np.append(y, np.zeros((pad_sig_len-sig_len))) indices = np.tile(np.arange(0, frame_len), (num_frames, 1)) + np.tile(np.arange(0, num_frames*frame_step, frame_step), (frame_len, 1)).T frames = pad_sig[np.mat(indices).astype(np.int32, copy=False)] # 加窗 frames *= np.hamming(frame_len) # 傅里叶变换和功率谱 NFFT = 512 mag_frames = np.absolute(np.fft.rfft(frames, NFFT)) pow_frames = (1.0/NFFT) * (mag_frames**2) # 转为MEL频率 low_freq_mel = 0 nfilt = 40 high_freq_mel = 2595 * np.log10(1 + (self.sampling_freq/2)/700) mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt+2) hz_points = 700 * (10**(mel_points/2595) - 1) bin = np.floor((NFFT+1) * hz_points / self.sampling_freq) fbank = np.zeros((nfilt, int(np.floor(NFFT/2 + 1)))) for m in range(1, nfilt + 1): f_m_minus = int(bin[m - 1]) # left f_m = int(bin[m]) # center f_m_plus = int(bin[m + 1]) # right for k in range(f_m_minus, f_m): fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1]) for k in range(f_m, f_m_plus): fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m]) filter_banks = np.dot(pow_frames, fbank.T) filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) # Numerical Stability filter_banks = 20 * np.log10(filter_banks) # dB num_ceps = 98 cep_lifter = 22 mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1 : (num_ceps + 1)] (nframes, ncoeff) = mfcc.shape n = np.arange(ncoeff) lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter) mfcc *= lift mfcc -= (np.mean(mfcc, axis=0) + 1e-8) return mfcc, filter_banks
def mfcc_calc(file_address, frame_size=frame_size, frame_stride=frame_stride, nfilt=nfilt, frame_limit=frame_limit): sample_rate, signal = scipy.io.wavfile.read( file_address) # sample_rate: number of samples per second # signal: 1D vector of audio data # create shorter-term frame for signal frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate signal_length = len(signal) frame_length = int(round(frame_length)) frame_step = int(round(frame_step)) if (signal_length > frame_length): num_steps = int( numpy.ceil(float(signal_length - frame_length) / frame_step)) else: num_steps = 1 num_frames = num_steps + 1 pad_signal_length = num_steps * frame_step + frame_length # number of zeros to pad at the end of signal pad_vector = numpy.zeros((pad_signal_length - signal_length)) pad_signal = numpy.append(signal, pad_vector) indices = numpy.tile(numpy.arange(0, frame_length), (num_frames, 1)) + \ numpy.tile(numpy.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T # indices in signal to slice to form frames frames = pad_signal[indices.astype(numpy.int32, copy=False)] # apply hamming function for FFT frames *= numpy.hamming(frame_length) #Report some values #print('sample_rate: ', sample_rate) #print('frame_length: ', frame_length) #print('frame_step: ', frame_step) #print('signal_length: ', signal_length) #print('num_frames: ', num_frames) #print('pad_signal_length: ', pad_signal_length) #print('frames: ', frames) # Fourier Transform and Power Spectrum NFFT = 512 mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT)) # Magnitude of the FFT pow_frames = ((1.0 / NFFT) * ((mag_frames)**2)) # Power Spectrum #Report some values #print('mag_frames: ', numpy.shape(mag_frames)) #print('pow_frames: ', numpy.shape(pow_frames)) # apply triangular filter low_freq_mel = 0 high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700) ) # Convert Hz to Mel mel_points = numpy.linspace( low_freq_mel, high_freq_mel, nfilt + 2) # Equally spaced in Mel scale (incl. low&high freq) hz_points = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz bin = numpy.floor((NFFT + 1) * hz_points / sample_rate) fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1)))) for m in range(1, nfilt + 1): f_m_minus = int(bin[m - 1]) # left f_m = int(bin[m]) # center f_m_plus = int(bin[m + 1]) # right for k in range(f_m_minus, f_m): fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1]) for k in range(f_m, f_m_plus): fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m]) filter_banks = numpy.dot(pow_frames, fbank.T) filter_banks = numpy.where(filter_banks == 0, numpy.finfo(float).eps, filter_banks) # Numerical Stability filter_banks = 20 * numpy.log10(filter_banks) # dB # Report some values #print('high_freq_mel: ', high_freq_mel) #print('mel_points: ', mel_points.shape) #print('hz_points: ', hz_points.shape) #print('bin: ', bin.shape) #print('fbank: ', fbank.shape) #print('filter_banks: ', filter_banks.shape) num_ceps = 12 mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1:(num_ceps + 1)] # Keep 2-13 cep_lifter = 23 (nframes, ncoeff) = mfcc.shape n = numpy.arange(ncoeff) lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter) mfcc *= lift # mean normalization mfcc -= (numpy.mean(mfcc, axis=0)) mfcc_result = numpy.zeros((frame_limit, num_ceps)) dim1 = len(mfcc) if (dim1 <= frame_limit): mfcc_result[:dim1, :] = mfcc else: mfcc_result[:, :] = mfcc[:frame_limit, :] # Report some values #print('dim1: ', dim1) #print('mfcc_result: ', mfcc_result.shape) #plt.imshow(mfcc_result, cmap='hot') #plt.show() #print(numpy.shape(mfcc_result.T.reshape(1,-1))) return mfcc_result.T.reshape(1, -1)
def mfcc_fft(complexSpectrum): powerSpectrum = (abs(complexSpectrum) ** 2) / nFFt filteredSpectrum = numpy.dot(powerSpectrum, melFilterBank(nFFt)) logSpectrum = numpy.log(filteredSpectrum) dctSpectrum = dct(logSpectrum, type=2) # MFCC :) print(dctSpectrum)
def mfcc(audio, fs=16000, window_dt=0.025, dt=0.01, n_cepstra=13, n_filters=26, n_fft=512, minfreq=0, maxfreq=None, preemph=0.97, lift=22, energy=True, n_derivatives=0, deriv_spread=2): """Compute MFCC features from an audio signal. Parameters ---------- audio : array_like (N, 1) The audio signal from which to compute features. fs : float, optional The samplerate of the signal we are working with. Default: 16000 window_dt : float, optional The length of the analysis window in seconds. Default: 0.025 (25 milliseconds) dt : float, optional The step between successive windows in seconds. Default: 0.01 (10 milliseconds) n_cepstra : int, optional The number of cepstral coefficients to return. Default: 13 n_filters : int, optional The number of filters in the filterbank. Default: 26 n_fft : int, optional The FFT size. Default: 512 minfreq : int, optional Lowest band edge of Mel filters, in Hz. Default: 0 maxfreq : int, optional highest band edge of mel filters, in Hz. Default: fs / 2 preemph : float, optional Apply preemphasis filter with preemph as coefficient; 0 is no filter. Default: 0.97 lifter : float, optional Apply a lifter to final cepstral coefficients; 0 is no lifter. Default: 22. energy : bool, optional If this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. Default: True n_derivatives : int, optional The number of derivatives to include in the feature vector. Affects the shape of the returned array. Default: 0 deriv_spread : int, optional The spread of the derivatives to includ in the feature vector. Greater spread uses more frames to compute the derivative. Default: 2 Returns ------- A numpy array of shape (audio.shape[0], n_cepstra * (1 + n_derviatives) containing features. Each row holds 1 feature vector. """ feat, energy_ = fbank( audio, fs, window_dt, dt, n_filters, n_fft, minfreq, maxfreq, preemph) feat = np.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:, :n_cepstra] feat = lifter(feat, lift) if energy: # replace first cepstral coefficient with log of frame energy feat[:, 0] = np.log(energy_) target = feat derivs = [] for i in range(n_derivatives): derivs.append(derivative(target, deriv_spread)) target = derivs[-1] return np.hstack([feat] + derivs)
def compute(self): qs = self.extrapolation.x iqs = self.extrapolation.y q = self.data.x background = self.background xs = np.pi*np.arange(len(qs),dtype=np.float32)/(q[1]-q[0])/len(qs) self.ready(delay=0.0) self.update(msg="Fourier transform in progress.") self.ready(delay=0.0) if self.check_if_cancelled(): return try: # ----- 1D Correlation Function ----- gamma1 = dct((iqs-background)*qs**2) Q = gamma1.max() gamma1 /= Q if self.check_if_cancelled(): return # ----- 3D Correlation Function ----- # gamma3(R) = 1/R int_{0}^{R} gamma1(x) dx # trapz uses the trapezium rule to calculate the integral mask = xs <= 200.0 # Only calculate gamma3 up to x=200 (as this is all that's plotted) # gamma3 = [trapz(gamma1[:n], xs[:n])/xs[n-1] for n in range(2, len(xs[mask]) + 1)]j # gamma3.insert(0, 1.0) # Gamma_3(0) is defined as 1 n = len(xs[mask]) gamma3 = cumtrapz(gamma1[:n], xs[:n])/xs[1:n] gamma3 = np.hstack((1.0, gamma3)) # Gamma_3(0) is defined as 1 if self.check_if_cancelled(): return # ----- Interface Distribution function ----- idf = dct(-qs**4 * (iqs-background)) if self.check_if_cancelled(): return # Manually calculate IDF(0.0), since scipy DCT tends to give us a # very large negative value. # IDF(x) = int_0^inf q^4 * I(q) * cos(q*x) * dq # => IDF(0) = int_0^inf q^4 * I(q) * dq idf[0] = trapz(-qs**4 * (iqs-background), qs) idf /= Q # Normalise using scattering invariant except Exception as e: import logging logger = logging.getLogger(__name__) logger.error(e) self.update(msg="Fourier transform failed.") self.complete(transforms=None) return if self.isquit(): return self.update(msg="Fourier transform completed.") transform1 = Data1D(xs, gamma1) transform3 = Data1D(xs[xs <= 200], gamma3) idf = Data1D(xs, idf) transforms = (transform1, transform3, idf) self.complete(transforms=transforms)
from scipy.fftpack import dct, idct import math if __name__ == '__main__': syllable_dict = Utility.load_obj( '/work/w2/decha/Data/GPR_speccom_data/Interspeech2017/syllable_dictionary_data_with_delta_deltadelta.pkl' ) for syl in syllable_dict: print syl lf0 = syllable_dict[syl][0] y = 0.0 for n, f in enumerate(lf0): y = y + f * math.cos(math.pi * 0 * (2.0 * n + 1) / (2.0 * len(lf0))) W = PoGUtility.generate_W_for_DCT(len(lf0), len(lf0)) lf0_dct = dct(lf0, norm='ortho') print lf0_dct sys.exit() pass