def lpf(image, sigma, mode=2):
    (Mx, My) = image.shape
    if mode == 1:
        kernel = matlab_style_gauss2D(image.shape, sigma)
        kernel /= numpy.max(kernel)

        if Mx == 1 or My == 1:
            fft = numpy.fft.fft(image)
            fft = numpy.fft.fftshift(fft)
            fft *= kernel
            result = numpy.real(numpy.fft.ifft(numpy.fft.fftshift(fft)))
        else:
            fft = numpy.fft.fftshift(numpy.fft.fft2(image))
            fft *= kernel
            result = numpy.real(numpy.fft.ifft2(numpy.fft.fftshift(fft)))
    elif mode == 2:
        new_dim = 2 * array(image.shape)
        kernel = matlab_style_gauss2D((new_dim[0], new_dim[1]), sigma * 2)
        kernel /= numpy.max(kernel)
        kernel = kernel[Mx:, My:]

        image = image.astype(numpy.double)
        if Mx == 1 or My == 1:
            dct = fftpack.dct(image, type=1)
            dct *= kernel
            result = numpy.real(fftpack.idct(dct, type=1))
        else:
            dct = fftpack.dct(fftpack.dct(image.T, type=2, norm='ortho').T, type=2, norm='ortho')
            dct *= kernel
            result = numpy.real(fftpack.idct(fftpack.idct(dct.T, type=2, norm='ortho').T, type=2, norm='ortho'))
    return result
def doDCT (filename):
	DCTSlot = 30
	img = cv2.imread(filename,0)
	img = cv2.resize(img, (200,200))
	imf = np.float32(img)
	dct_ = dct(dct(imf.T, norm='ortho').T, norm='ortho')
	# dct_ = np.matrix([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]])

	# read as dct, return as a list
	list_ = zigzagRead_half(dct_)
	list_ = np.abs(list_)
	# print dct_
	# print list_

	# first 10 slots x, next 10 slots 2x, the third 10 slots 4x
	slotLen = len(list_) / 70 + 1
	# slotLen = len(list_) / DCTSlot + 1
	# print slotLen
	dct_result = np.zeros((DCTSlot,1))

	# get into each slot
	for i in range(len(list_)):
		if i+1 < 10*slotLen:
			dct_result[(i+1) / slotLen] += list_[i]
		elif i+1 >=10*slotLen and i+1 < 30 * slotLen:
			dct_result[10 + (i+1-10*slotLen) / (2*slotLen)] += list_[i]
		else:
			dct_result[20 + (i+1-30*slotLen) / (4*slotLen)] += list_[i]

	# normalization
	base = np.linalg.norm(dct_result)
	dct_result /= base
	dct_result = np.transpose(dct_result)

	return dct_result
Exemple #3
0
def fourier_interp(sig_objs):
  nws = len(sig_objs[0].aspec)
  nqs = len(sig_objs) 
  pads = 8*nqs
  aspec = np.zeros([nws, nqs+pads])
  aux = np.zeros(nqs)
  f = open('aspecinterp.dat', 'w')
  for iw in range(nws):
    for i, sig in enumerate(sig_objs):
      tmp = map(float, sig.aspec[iw].split())
#trace over valence manifold:
      aux[i] = sum(tmp[2:9])
#  now have fourier coefficients interpolate back on to dense grid
    aux[:] = dct(aux[:], 2, norm='ortho')
#dct type 3 is the 
    auxd = np.pad(aux, (0, pads), 'constant')
    auxd = dct(auxd[:], 3, norm='ortho')
    for iq in range(len(auxd)):
      aspec[iw][iq] = auxd[iq]
  for iq in range(nqs+pads)[::-1]:
#pads with zeros
    for iw in range(nws):
      print >>f,  -iq, (sig.aspec[iw].split())[0], aspec[iw][iq]
    print >>f, ''
  for iq in range(nqs+pads):
#pads with zeros
    for iw in range(nws):
      print >>f, iq, (sig.aspec[iw].split())[0], aspec[iw][iq]
    print >>f, ''
  f.close()
def energy_99 ():

	files = []
	# directory for read files.
	directory = "/Users/cyan/Desktop/color_hist_py/cropImage_large/"
	for infile in glob.glob(os.path.join(directory,'*.jpg')):
		files.append(infile)
		# print "current file is " + infile

	result_idx = []
	for i in range(100):
		print i
		img = cv2.imread(files[i],0)
		img = cv2.resize(img, (200,200))
		imf = np.float32(img)
		dct_ = dct(dct(imf.T, norm='ortho').T, norm='ortho')
		# dct_ = np.matrix([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]])

		# read as dct, return as a list
		list_ = zigzagRead(dct_)
		total_energy = np.linalg.norm(list_)
		print "total enery" + str(total_energy)
		print np.linalg.norm(list_[:1])

		for j in range(1, len(list_)+1):
			if np.linalg.norm(list_[:j]) > 0.99 * total_energy:
				print "J :" + str(j)
				result_idx.append(j)
				break

	return result_idx
def A_dct2(x, n, omega=None):
    """
    Take the 2-dimensional type II DCT of the flattened (n,n) image
    contained in vector x. Works across columns.

    Parameters
    ----------

    x : ndarray, shape (n*n,n_col)
      flattened image vector
    n : int
      image column/row size
    omega : ndarray
      support of the output (ie, indices at which output vector is sampled)

    Returns
    -------

    y = dct2(x.reshape(n,n))[omega]
    """
    col_shapes = x.shape[1:] if len(x.shape) > 1 else ()
    x.shape = (n,n) + col_shapes
    y = fftpack.dct(x, type=2, axis=0, norm='ortho')
    y = fftpack.dct(y, type=2, axis=1, norm='ortho')
    x.shape = (n*n,) + col_shapes
    # this syntax needed because y is discontiguous in memory
    y = np.reshape(y, x.shape)
    if omega:
        return np.take(y, omega, axis=0)
    return y
def dct_distance(rep_one, rep_two, norm=True, num_coefficients=3):
    if not isinstance(rep_one, np.ndarray):
        rep_one = rep_one.to_array()
    if not isinstance(rep_two, np.ndarray):
        rep_two = rep_two.to_array()
    assert (rep_one.shape[1] == rep_two.shape[1])
    num_bands = rep_one.shape[1]
    distance = 0
    for i in range(num_bands):
        try:
            source_dct = dct(rep_one[:, i], norm='ortho')
        except ValueError:
            print(rep_one)
            raise
        if norm:
            source_dct = source_dct[1:]
        source_dct = source_dct[0:num_coefficients]
        target_dct = dct(rep_two[:, i], norm='ortho')
        if norm:
            target_dct = target_dct[1:]
        target_dct = target_dct[0:num_coefficients]
        if len(target_dct) < num_coefficients:
            source_dct = source_dct[:len(target_dct)]
        if len(source_dct) < num_coefficients:
            target_dct = target_dct[:len(source_dct)]
        distance += euclidean(source_dct, target_dct)
    return distance / num_bands
def dctFromPixel(luminance_image, x, y):
	# get the square of size sampling_side centered at (x, y)
	neighbors = find_neighbors(luminance_image, x, y)
	# compute the DCT for the square and reshape it into a 1D array
	discrete_cosine_transform = dct(dct(neighbors.T, norm='ortho').T, norm='ortho')
	feature = np.reshape(discrete_cosine_transform, -1).tolist()
	return feature
def A_dct2(x, n, omega):
    """
    Take the 2-dimensional type II DCT of the flattened (n,n) image
    contained in vector x.

    Parameters
    ----------

    x : ndarray, shape (n*n,)
      flattened image vector
    n : int
      image column/row size
    omega : ndarray
      support of the output (ie, indices at which output vector is sampled)

    Returns
    -------

    y = dct2(x.reshape(n,n))[omega]
    """
    x.shape = (n,n)
    y = fftpack.dct(x, type=2, axis=0, norm='ortho')
    y = fftpack.dct(y, type=2, axis=1, norm='ortho')
    x.shape = (n*n,)
    return y.flat[omega]
def extract_dct_features(time_windows, class_attr=None, n_comps=48):
    X_matrix = []
    y_vect = None
    if class_attr is not None:
        y_vect = []

    for tw in time_windows:        
        x = tw['x'].values
        y = tw['y'].values
        z = tw['z'].values
        m = mag(x,y,z)
        
        dct_x = np.abs(fftpack.dct(x))
        dct_y = np.abs(fftpack.dct(y))
        dct_z = np.abs(fftpack.dct(z))
        dct_m = np.abs(fftpack.dct(m))
        
        v = np.array([])       
        v = np.concatenate((v, dct_x[:n_comps]))            
        v = np.concatenate((v, dct_y[:n_comps]))
        v = np.concatenate((v, dct_z[:n_comps]))
        v = np.concatenate((v, dct_m[:n_comps]))       
        X_matrix.append(v)
        if y_vect is not None:
            y_vect.append(tw[class_attr].iloc[0])       
    
    X_matrix = np.array(X_matrix) 
           
    if y_vect is None:
        return X_matrix
    else:
        return X_matrix, y_vect
Exemple #10
0
def process_cube(img, weight, quality):
    # TODO: check to make sure that size of img, and q_tables are consistent

    img = img.copy()

    # print('process_cube input: {}'.format(img))

    this_quality = np.round(np.max(weight)*quality)

    if this_quality < 0:
        this_quality = 0
    if this_quality > quality - 1:
        this_quality = quality - 1

    for i in range(img.shape[3]):
        img[:, :, :, i] = cv2.cvtColor(img[:, :, :, i], cv2.COLOR_BGR2LAB)
    img = np.float32(img)

    # print('process_cube pre DCT: {}'.format(img))

    # img_dct = dct(dct(dct(img, axis=0)/4, axis=1)/4, axis=3)/4
    img_dct = dct(dct(img, axis=0)/4, axis=1)/4

    Q_luma = luminance_tables[:, :, :, this_quality].astype(np.float32)
    Q_chroma = chrominance_tables[:, :, :, this_quality].astype(np.float32)

    # Q_luma[:, :, :] = .01
    # Q_chroma[:, :, :] = .01

    # print('Q_luma: {}'.format(Q_luma))
    # print('Q_chroma: {}'.format(Q_chroma))

    # print('dct, pre rounding: {}'.format(img_dct))
    img_dct[:, :, 0, :] /= Q_luma
    img_dct[:, :, 1, :] /= Q_chroma
    img_dct[:, :, 2, :] /= Q_chroma

    img_dct = np.round(img_dct)

    img_dct[:, :, 0, :] *= Q_luma
    img_dct[:, :, 1, :] *= Q_chroma
    img_dct[:, :, 2, :] *= Q_chroma
    # print('dct, post rounding: {}'.format(img_dct))

    # img_processed = idct(idct(idct(img_dct, axis=0)/4, axis=1)/4, axis=3)/4
    img_processed = idct(idct(img_dct, axis=0)/4, axis=1)/4

    # print('process_cube post DCT: {}'.format(img_processed))

    img_processed = np.clip(img_processed, 0, 255)
    img_processed = np.uint8(img_processed)

    for i in range(img.shape[3]):
        img_processed[:,:,:,i] = cv2.cvtColor(img_processed[:,:,:,i], cv2.COLOR_LAB2BGR)

    # print('process_cube output: {}'.format(img))

    # print('pre dct / post_dct: {}'.format(pre_dct / post_dct))
    return img_processed
 def fastChebScalar(self, fj, fk):
     """Fast Chebyshev scalar product."""
     N = fj.shape[0]
     if self.quad == "GL":
         fk = dct(fj, 2, axis=0)*np.pi/(2*N)
     elif self.quad == "GC":
         fk = dct(fj, 1, axis=0)*np.pi/(2*(N-1))
     return fk
def discrete_cosine(reference, target):
    
    reference_transform = fftpack.dct( reference )
    target_transform = fftpack.dct( target )
    reference_curve = reference_transform.mean(axis = 1)
    target_curve = target_transform.mean(axis = 1)

    return reference_transform, target_transform, reference_curve, target_curve
def dct2(arr):
    """
    @todo: 2D-dct, constructed from scipy's dct2
    @params { np.ndarray } arr
    @return { np.ndarray }
    """
    array = np.float64(arr)
    result = dct(dct(array, axis=0), axis=1)
    return result
def laplacian_pca_TV(res, x, f0, lam, gam, iter = 10):
    '''
    TV version of Laplacian embedding
    :param res: resolution of the grid
    :param x: numpy array of data in rows
    :param f0: initial embedding matrix
    :param lam: sparsity parameter
    :param gam: fidelity parameter
    :param iter: number of iterations to carry out
    :return: returns embedding matrix
    '''
    # f0 is an initial projection
    n = res ** 2
    num_data = x.shape[0]

    D = sparse_discrete_diff(res)
    M = 1/(lam*laplacian_eigenvalues(res).reshape(n)+gam)

    f = f0
    y = x .dot(f)
    z = shrink(y .dot(D.T), lam)

    for i in range(iter):

        # Update z
        z_old = z
        z = shrink(y .dot (D.T), lam)

        # Update f
        f_old = f
        u, s, v = la.svd(x.T .dot (y), full_matrices=False)
        f = u .dot(v)

        # Update y
        y_old = y
        q = lam * z .dot (D) + gam * x .dot(f)
        # print('norm of y before is %f' % np.sum(q ** 2))
        y = fftpack.dct(q.reshape((num_data, res, res)), norm='ortho') # Images unraveled as rows
        y = fftpack.dct(np.swapaxes(y,1,2), norm='ortho') # Swap and apply dct on the other side
        # print('norm of y after is %f' % np.sum(y ** 2))
        y = np.apply_along_axis(lambda v: M * v, 1, y.reshape((num_data, n)))
        y = fftpack.idct(y.reshape((num_data, res, res)), norm='ortho')
        y = fftpack.idct(np.swapaxes(y,1,2), norm='ortho')
        y = y.reshape((num_data, n))

        zres = np.sqrt(np.sum((z - z_old) ** 2))
        znorm = np.sqrt(np.sum((z)**2))
        yres = np.sqrt(np.sum((y - y_old) ** 2))
        ynorm = np.sqrt(np.sum((y)**2))
        fres = np.sqrt(np.sum((f - f_old) ** 2))

        value = np.sum(abs(z)) + 0.5*lam*np.sum((z-y .dot(D.T))**2) + 0.5*gam*np.sum((y- x .dot(f) )**2)
        print('Iter %d Val %f Z norm %f Z res %f Ynorm %f Y res %f F res %f' % (i, value, znorm, zres, ynorm, yres, fres))

    return f
 def ifct(self, fk, cj):
     """Inverse fast Chebyshev transform."""
     if self.quad == "GL":
         cj = 0.5*dct(fk, 3, axis=0)
         cj += 0.5*fk[0]
     elif self.quad == "GC":
         cj = 0.5*dct(fk, 1, axis=0)
         cj += 0.5*fk[0]
         cj[::2] += 0.5*fk[-1]
         cj[1::2] -= 0.5*fk[-1]
     return cj
Exemple #16
0
def dct2(y):
    M = y.shape[0]
    N = y.shape[1]
    a = empty([M,N],float)
    b = empty([M,N],float)

    for i in range(M):
        a[i,:] = dct(y[i,:],norm='ortho')
    for j in range(N):
        b[:,j] = dct(a[:,j],norm='ortho')

    return b
def At_dct2(y, n, omega):
    """
    Take the 2-dimensional type III DCT of the flattened (n,n) matrix
    contained in vector y. This is the adjoint operator to the A_dct2
    operator defined above
    """

    y2 = np.zeros((n,n), 'd')
    y2.flat[omega] = y
    w = fftpack.dct(y2, type=3, axis=0, norm='ortho')
    w = fftpack.dct(w, type=3, axis=1, norm='ortho')
    return w.flatten()
Exemple #18
0
def multiwavelet_from_rgb(rgb):
    from scipy.fftpack import dct
    from pywt import wavedec2

    r = rgb[:, :, 0].astype(np.float)
    g = rgb[:, :, 1].astype(np.float)

    dctr = dct(r, norm='ortho').ravel()
    dctg = dct(g, norm='ortho').ravel()
    daubr = _unpack(wavedec2(r, 'db4'))
    daubg = _unpack(wavedec2(g, 'db4'))
    return np.hstack([dctr, dctg, daubr, daubg])
def calculate_DCTII_2D(matrix):
    """
    Calculates the 2D transform of the DCT II algorithm.
    Assumes a square matrix.

    See:
        http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II

    We are using the plain version, which seems to work better.
    """
    a = numpy.reshape(numpy.array(matrix), (32, 32))
    return fftpack.dct(fftpack.dct(a.T).T)
 def fct(self, fj, cj):
     """Fast Chebyshev transform."""
     N = fj.shape[0]
     if self.quad == "GL":
         cj = dct(fj, 2, axis=0)
         cj /= N
         cj[0] /= 2        
     elif self.quad == "GC":
         cj = dct(fj, 1, axis=0)/(N-1)
         cj[0] /= 2
         cj[-1] /= 2
     return cj
 def fastChebScalar(self, fj, fk):
     """Fast Chebyshev scalar product."""
     if self.fast_transform:
         N = fj.shape[0]
         if self.quad == "GL":
             fk = dct(fj, 2, axis=0)*np.pi/(2*N)
         elif self.quad == "GC":
             fk = dct(fj, 1, axis=0)*np.pi/(2*(N-1))
     else:
         if self.points is None: self.init(fj.shape[0])
         fk[:] = np.dot(self.V, fj*self.weights)
         
     return fk
Exemple #22
0
 def __init__(self, audiofile, mel_bands = 40, weight=2):
     super(MFCC, self).__init__(audiofile, skip_frames=3);
     self.weight = float(weight)
     self.mel_bands = mel_bands
     self.powerspectrum()
     self.map_to_mel()
     self.calc_gradient()
     #plt.figure()
     self.mfcc = dct(self.processed, type=2, norm="ortho", axis=0)[:13]
     #plt.imshow(mfcc, origin="lower", aspect="auto",  interpolation="nearest")
     self.delta = dct(self.delta, type=2, norm="ortho", axis=0)[:13]
     self.processed = np.zeros(26)
     self.processed[:13] = np.mean(self.mfcc, axis = 1)
     self.processed[13:] = np.mean(self.delta, axis = 1)
def razafindradina_embed(grayscale_container_path, grayscale_watermark_path, watermarked_image_path, alpha):
    """    
    Razafindradina embedding method implementation. 
    
    Outputs the resulting watermarked image
    
    23-July-2015
    """

    grayscale_container_2darray = numpy.asarray(Image.open(grayscale_container_path).convert("L"))
    grayscale_watermark_2darray = numpy.asarray(Image.open(grayscale_watermark_path).convert("L"))

    assert (
        (grayscale_container_2darray.shape[0] == grayscale_container_2darray.shape[1])
        and (grayscale_container_2darray.shape[0] == grayscale_watermark_2darray.shape[0])
        and (grayscale_container_2darray.shape[1] == grayscale_watermark_2darray.shape[1])
    ), "GrayscaleContainer and GrayscaleWatermark sizes do not match or not square"

    # Perform DCT on GrayscaleContainer

    # print grayscale_container_2darray

    gcdct = dct(dct(grayscale_container_2darray, axis=0, norm="ortho"), axis=1, norm="ortho")

    # print grayscale_container_2darray

    # Perform SchurDecomposition on GrayscaleWatermark

    gwsdt, gwsdu = schur_decomposition(grayscale_watermark_2darray)

    # alpha-blend GrayscaleWatermark TriangularMatrix into GrayscaleContainer DCT coeffs with alpha

    gcdct += gwsdt * alpha

    # Perform IDCT on GrayscaleContainer DCT coeffs to get WatermarkedImage

    watermarked_image_2darray = idct(idct(gcdct, axis=0, norm="ortho"), axis=1, norm="ortho")

    watermarked_image_2darray[watermarked_image_2darray > 255] = 255
    watermarked_image_2darray[watermarked_image_2darray < 0] = 0

    watermarked_image = Image.fromarray(numpy.uint8(watermarked_image_2darray))

    # watermarked_image.show()

    # Write image to file

    watermarked_image.save(watermarked_image_path)

    return
Exemple #24
0
def idct2(b,c,d):
    #M = b.shape[0]
    #N = b.shape[1]
    M = c
    N = d
    a = empty([64,64],float)
    y = empty([64,64],float)

    for i in range(M):
        a[i,:] = dct(b[i,:])
    for j in range(N):
        y[:,j] = dct(a[:,j])

    return y
Exemple #25
0
    def _frame_hash(self, im, hasher):
        im_gray = im.convert('F')
        im_small = _resize_to_width(im_gray, self._standard_width)
        mat = numpy.asarray(im_small, dtype=numpy.float32) - 128
        edge_width = self._edge_width
        mat_core = mat[edge_width:(mat.shape[0]-edge_width), edge_width:(mat.shape[1]-edge_width)]
        mat_dct = fftpack.dct(fftpack.dct(mat_core, norm='ortho').T, norm='ortho').T
    
        _, height_small = im_small.size
        hasher.update('%d' % (height_small / self._height_split))

        for ii in range(0, self._dct_core_width):
            for jj in range(0, self._dct_core_width):
                hasher.update(self._prepare_coeff(mat_dct[ii][jj]))
def temporal_dc_variation_feature_extraction(frames):
    '''
    computes dt_dc_measure 1
    ''' 
    mbsize = 16
    row = frames.shape[0]
    col = frames.shape[1]
        
    motion_vects = zeros(shape=(2,row*col/mbsize**2,frames.shape[2]-1))
    for x in xrange(0,frames.shape[2]-1):#xrange is inclusive at beginning, exclusive at end, end 1 early since x+1
        imgP = frames[:,:,x+1]
        imgI = frames[:,:,x]
        motion_vects[:,:,x], temp = motionEstNTSS(imgP,imgI,mblock,7)
    
    #motion_vects.dump(open("./pythonvects", "wb"))

    dct_motion_comp_diff = zeros(shape=(row,col,frames.shape[2]-1))
    for x in xrange(0,frames.shape[2]-1):
        mbCount = 0
        for i in xrange(0,row-mbsize+1,mbsize):
            for j in xrange(0,col-mbsize+1,mbsize):
                dct_motion_comp_diff[i:i+mbsize-1,j:j+mbsize-1,x] = dct(dct(((frames[i:i+mbsize-1,j:j+mbsize-1,x+1].astype(np.float))-
                            frames[i+motion_vects[0,mbCount,x]:i+mbsize-1+motion_vects[0,mbCount,x],
                            j+motion_vects[1,mbCount,x]:j+mbsize-1+motion_vects[1,mbCount,x],x].astype(np.float).clip(min=0)).astype(np.float),norm="ortho").transpose(),norm="ortho").transpose();
                            #print "i"
                            #print i
                            #print (frames[i:i+mbsize-1,j:j+mbsize-1,x+1].astype(np.float)-
                            #            frames[i+motion_vects[0,mbCount,x]:i+mbsize-1+motion_vects[0,mbCount,x],
                            #            j+motion_vects[1,mbCount,x]:j+mbsize-1+motion_vects[1,mbCount,x],x].astype(np.float)).astype(np.float)
                            #print frames[i:i+mbsize-1,j:j+mbsize-1,x+1]
                            #print frames[i+motion_vects[0,mbCount,x]:i+mbsize-1+motion_vects[0,mbCount,x],
                            #            j+motion_vects[1,mbCount,x]:j+mbsize-1+motion_vects[1,mbCount,x],x]
                mbCount = mbCount + 1
    dct_motion_comp_diff.dump(open("./pythonvects", "wb"))
    std_dc = zeros(shape=(frames.shape[2]-1))
    for i in xrange(0,frames.shape[2]-1):
        temp = im2colDistinct(dct_motion_comp_diff[:,:,i],(16,16));
        std_dc[i] = np.std(temp)
    
    dt_dc_temp = zeros(shape=(std_dc.shape[0]-1))#this will be 1 smaller than std_dc
    for i in xrange(0,len(std_dc) - 1):
        dt_dc_temp[i] = abs(std_dc[i+1]-std_dc[i])
    print 'dt_dc_temp'
    print dt_dc_temp.shape
    print dt_dc_temp
    
    dt_dc_measure1 = np.mean(dt_dc_temp)    
    print 'dt_dc_measure1'
    print dt_dc_measure1
Exemple #27
0
def transform(data):
    result = []
    for i in range(len(data[0])):
        result.append([])

    for i in range(len(data)):
        partial_result = dct(data[i])
        for j in range(len(partial_result)):
            result[j].append(partial_result[j])

    final_result = []
    for i in range(len(result)):
        partial_result = dct(result[i])
        final_result.append(partial_result)
    return final_result
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
         nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True,
         winfunc=lambda x:numpy.ones((x,))):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """            
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat
Exemple #29
0
    def _process(self, data):
        data = np.abs(data)
        bfcc = dct(safe_log(data), axis=1) \
            [:, self._exclude: self._exclude + self._n_coeffs]

        yield ArrayWithUnits(
            bfcc.copy(), [data.dimensions[0], IdentityDimension()])
Exemple #30
0
def mfcc(s, fs):
	#Constants
	N = 256
	M = 100
	P = 30
	l = int(math.ceil((s.size-N+1)/M))

	#Allocate c array
	c = np.zeros((P,l));

	for x in range(0,l-1):
		#Frame
		start = x * M;
		frame = s[start:start+N];

		#Window
		w = np.hamming(N)
		windFrame = frame * w

		#FFT
		frameFFT = np.fft.fft(windFrame)

		#Mel-Frequency Wrapping
		m = get_filterbanks(P,N,fs)
		n2 = math.floor(N/2)
		ms = np.dot(m , abs(np.power(frameFFT[0:n2+1],2)))
		#Last step, compute mel-frequency cepstrum coefficients
		c[:,x] = fft.dct(np.log(ms.clip(min=0.00001)));
	np.delete(c,0,0)    # exclude 0'th order cepstral coefficient
	
	return c
Exemple #31
0
def mfsc_to_sp(mfsc, alpha=0.45, N=2048):
    mc = sfft.dct(mfsc, norm='ortho')  # Mel cepstrum
    sp = pysptk.conversion.mc2sp(mc, alpha, N)  # Spectral envelope
    return sp
Exemple #32
0
def a_dct(l, m):
    tmp = dct(l, type=2, norm = 'ortho')
    tmp_idx = sorted(range(len(tmp)), key=lambda k: -abs(tmp[k]))
    tmp[tmp_idx[m:]] = 0
    return tmp
Exemple #33
0
def mfcc(sig, sr, nbins=40):
    #calculate power spectrum of the signal
    pw = pwrspec(sig, sr, nbins)
    #DCT of log of power spectrum
    return dct(np.log(pw))
Exemple #34
0
 def Atdct(y):
     x = np.zeros(m)
     x[pix_idx] = y
     x = dct(x, type=2, norm='ortho')
     return x
Exemple #35
0
def dct2(block):
    return dct(dct(block.T, norm='ortho').T, norm='ortho')
Exemple #36
0
def dct2d(a):
    return fftpack.dct(fftpack.dct(a, axis=0), axis=1)

### setting the range
tstart=-2*N; tend=3*N # extended range to see periodicity
t=linspace(tstart,tend,500) # continuous range
nrange=arange(N) #discrete range
nextended=arange(tstart,tend) # extended discrete range to see periodicity
dctrange=arange(0+.25,N/2+.25,.5) # shifted range to compare the DCT and DFT



### Analysis
# Console output of DCT types 1-4 and DFT
# Note: with scaling, I'm looking for factors that conserve 
# signal energy across domains.
print 'type 1:         ',dct(sequence,type=1)
print 'type 1 (scaled):',dct(sequence,type=1)*2/(1*N)
print 'type 2:         ',dct(sequence,type=2)
print 'type 2 (scaled):',dct(sequence,type=2)*2/(1*N)
print 'type 3:         ',dct(sequence,type=3)
print 'type 3 (scaled):',dct(sequence,type=3)*2/(1*N)
print 'type 4:         ',array(map(abs,DCTx(nrange)))
print 'dft:            ',array(map(abs,(DFTx(nrange))))



### Plotting
#close('all')
myfig=figure(1,figsize=(16,8), dpi=120)
clf()
subplots_adjust(wspace=.15,hspace=0.03)
Exemple #38
0
def dct_2d(image):
    return fftpack.dct(fftpack.dct(image.T, norm='ortho').T, norm='ortho')
def dct2d(x, inverse=False):
    t = 2 if not inverse else 3
    temp = dct(x, type=t, norm='ortho').transpose()
    return dct(temp, type=t, norm='ortho').transpose()
def dct2d(data):
    return fftpack.dct(fftpack.dct(data, norm='ortho').T, norm='ortho').T
Exemple #41
0
def DCT_all_blocks(img,w):
    '''
    computes the DCT II of all the wxw overlapping blocks in img.
    '''
    Q_aux = view_as_windows(img, w).reshape(-1, w, w)
    return dct(dct(Q_aux, axis=1, norm='ortho'), axis=2, norm='ortho')
Exemple #42
0
def dct2(x):
    return dct(dct(x, norm='ortho').T, norm='ortho').T
Exemple #43
0
def get_2D_dct(img):
    """ Get 2D Cosine Transform of Image
    """
    return fftpack.dct(fftpack.dct(img.T, norm='ortho').T, norm='ortho')
    pad_signal = np.concatenate((signal, zeros))
    #split into frames
    indices = np.tile(np.arange(0, frame_length), (frames_num, 1)) + np.tile(
        np.arange(0, frames_num * frame_step, frame_step), (frame_length, 1)).T
    indices = np.array(indices, dtype=np.int32)
    frames = pad_signal[indices]
    frames *= np.hamming(frame_length)

    #FFT and abs
    complex_spectrum = np.fft.rfft(frames, NFFT).T
    absolute_complex_spectrum = np.abs(complex_spectrum)

    #create triangular filter and plot it
    fb = get_filter_banks(filters_num, NFFT, fs, low_freq, high_freq)
    plt.figure(3)
    plt.title('fb')
    for i in range(filters_num):
        plt.plot(fb[i])

    #signal inner product with triangular filter
    feat = np.dot(np.transpose(absolute_complex_spectrum), np.transpose(fb))
    feat = np.where(feat == 0, np.finfo(float).eps, feat)
    feat = np.log(feat)

    #Apply DCT
    feat = dct(feat, norm='ortho')[:, :filters_num]

    print(feat.shape)
    final = 'train_%d.npy' % (k + 30)
    np.save(final, feat)
Exemple #45
0
x = np.array([1.0,2.0,-1.0,1.5])
y = fft(x) 
print(y)

from scipy.fftpack import ifft

x = np.array([1.0,2.0,-1.0,1.5])
y = fft(x)
yinv = ifft(y)
print(yinv)

from scipy import fftpack
time_step = 0.02
period = 5.
time_vec = np.arange(0,20,time_step)
sig = np.sin(2*np.pi/period*time_vec) + 0.5*np.random.randn(time_vec.size)
print(sig.size)

sample_freq = fftpack.fftfreq(sig.size,d = time_step)
sig_fft = fftpack.fft(sig)
print(sig_fft)

from scipy.fftpack import dct
from scipy.fftpack import idct

mydict =dct(np.array([4.,3.,5.,10.,5.,3.]))
print(mydict)
d = idct(np.array([4.,3.,5.,10.,5.,3.0]))
print(d)
Exemple #46
0
def dct2(a):
    return dct(dct(a.T, norm='ortho').T, norm='ortho')
Exemple #47
0
 def transform(self, x):
     return dct(x)
Exemple #48
0
        for i in range(total_frames):
            for j in range(framesize):
                if ((i * overlap + j) < num_samples):
                    frames[i][j] = data[i * overlap + j]
                else:
                    frames[i][j] = 0

        saw_filter_a = signal.waveforms.sawtooth(range(len(frames)),
                                                 width=[0.5])
        saw_filter_b = signal.waveforms.sawtooth(range(len(frames)),
                                                 width=[0.6])
        for i in range(total_frames):
            dft_matrix[i] = np.fft.fft(frames[i])
            dft_matrix[i] = signal.filtfilt(saw_filter_a, saw_filter_b,
                                            dft_matrix[i])
            dft_matrix[i] = dct(dft_matrix[i])
            abs_dft_matrix[i] = abs(dft_matrix[i]) * abs(dft_matrix[i]) / max(
                abs(dft_matrix[i]))
            abs_dft_matrix[i] = np.log10(abs_dft_matrix[i])

        f = open(file_name[:-4] + ".logFBE", "w+")
        f.writelines(str(abs_dft_matrix))

        t = range(len(abs_dft_matrix))
        plt.plot(t, abs_dft_matrix)
        plt.ylabel("Frequency")
        plt.xlabel("Frame number")

    except Exception as e:
        print("Exception thrown as: " + str(e))
        pass
Exemple #49
0
def create_mfcc(file_name, start_point):
    file_path = 'F:/Projects/speech/speeches mail/' + str(file_name)
    sample_rate, signal = scipy.io.wavfile.read(file_path)
    signal = signal[start_point:int(start_point +
                                    3 * sample_rate)]  #framing to 3 seconds
    emphasized_signal = numpy.append(signal[0],
                                     signal[1:] - pre_emphasis * signal[:-1])
    frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate  # Convert from seconds to samples
    signal_length = len(emphasized_signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    num_frames = int(
        numpy.ceil(
            float(numpy.abs(signal_length - frame_length)) /
            frame_step))  # Make sure that we have at least 1 frame
    pad_signal_length = num_frames * frame_step + frame_length
    z = numpy.zeros((pad_signal_length - signal_length))
    pad_signal = numpy.append(
        emphasized_signal, z
    )  # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal
    indices = numpy.tile(numpy.arange(
        0, frame_length), (num_frames, 1)) + numpy.tile(
            numpy.arange(0, num_frames * frame_step, frame_step),
            (frame_length, 1)).T
    frames = pad_signal[indices.astype(numpy.int32, copy=False)]
    frames *= numpy.hamming(frame_length)  #hamming window
    mag_frames = numpy.absolute(numpy.fft.rfft(frames,
                                               NFFT))  # Magnitude of the FFT
    pow_frames = ((1.0 / NFFT) * ((mag_frames)**2))  # Power Spectrum
    low_freq_mel = 0
    high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700)
                     )  # Convert Hz to Mel
    mel_points = numpy.linspace(low_freq_mel, high_freq_mel,
                                nfilt + 2)  # Equally spaced in Mel scale
    hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
    bin = numpy.floor((NFFT + 1) * hz_points / sample_rate)
    fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])  # left
        f_m = int(bin[m])  # center
        f_m_plus = int(bin[m + 1])  # right
        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    filter_banks = numpy.dot(pow_frames, fbank.T)
    filter_banks = numpy.where(filter_banks == 0,
                               numpy.finfo(float).eps,
                               filter_banks)  # Numerical Stability
    filter_banks = 20 * numpy.log10(filter_banks)  # dB
    mfcc = dct(filter_banks, type=2, axis=1,
               norm='ortho')[:, 1:(num_ceps + 1)]  # Keep 2-13
    (nframes, ncoeff) = mfcc.shape
    n = numpy.arange(ncoeff)
    lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter)
    mfcc *= lift  #*

    filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
    mfcc -= (numpy.mean(mfcc, axis=0) + 1e-8)

    return mfcc
Exemple #50
0
def tl_nmf(Y, K, Phi=None, W=None, H=None, regul=None, max_iter=300,
           n_iter_tl=5, tol=1e-4, verbose=False, rng=None):
    '''Runs Transform learning NMF

    Parameters
    ----------
    Y : array, shape (M, N)
        Frames matrix

    K : int
        Rank of the learned feature matrices.

    Phi : array, shape (M, M) | 'random' | 'dct' | None, optional
        Initial Transform. Should be orthogonal. If 'random', start from a
        random orthogonal matrix. If 'dct', start from the DCT coefficients.
        Random by default

    W : array, shape (M, K) | None, optional
        Initial dictionnary.

    H : array, shape (K, N) | None, optional
        Initial activations.

    regul : float | None, optional
        Level of regularization. By default, a heuristic is used.

    max_iter : int, optional
        Maximal number of iterations for the algorithm

    n_iter_tl : int, optional
        Number of iteration of Transform learning between NMF steps

    tol : float, optional
        tolerance for the stopping criterion. Iterations stop when two
        consecutive iterations of the algorithm have a relative objective
        change lower than tol.

    verbose : boolean, optional
        Wether to print or not informations about the current state

    rng : RandomState, optional
        random seed of the algorithm

    Returns
    -------
    Phi : array, shape (M, M)
        The estimated transform matrix

    W : array, shape (M, K)
        The estimated dictionnary

    H : array, shape (K, N)
        The estimated activations

    Phi_init : array, shape (M, M)
        Initial Phi

    infos_list : dict
        Contains various metrics monitoring convergence. Same as printed by
        Verbose.
    '''
    regul_type = 'sparse'
    M, N = Y.shape

    rng = check_random_state(rng)
    # Initialization
    if regul is None:
        regul = 1e6 * float(K) / M
    if type(Phi) is not np.ndarray:
        if Phi is None:
            Phi = 'random'
        if Phi == 'random':
            Phi = unitary_projection(rng.randn(M, M))
        elif Phi == 'dct':
            Phi = fftpack.dct(np.eye(M), 3, norm='ortho')
    if W is None:
        W = np.abs(rng.randn(M, K)) + 1.
        W = W / np.sum(W, axis=0)
    if H is None:
        H = np.abs(rng.randn(K, N)) + 1.

    X = np.dot(Phi, Y)
    V = X ** 2  # Initial spectrogram
    V_hat = np.dot(W, H)  # Initial factorization

    obj = is_div(V, V_hat) + regul * penalty(H, regul_type)  # Objective
    Phi_init = Phi.copy()

    # Monitoring
    obj_list = []
    eps_list = []
    tl_obj_list = []
    nmf_obj_list = []
    d_phi_list = []
    d_phi_i_list = []
    # Verbose
    if verbose:
        print('Running TL-NMF with %s regularization on a %d x %d '
              'problem with K = %d' % (regul_type, M, N, K))
        print(' | '.join([name.center(8) for name in
                         ["iter", "obj", "eps", "NMF", "TL", "d_phi",
                          "d_phi_i"]]))
    for n in range(max_iter):
        # NMF
        if regul_type == 'smooth':
            W, H = update_nmf_smooth(V, W, H, V_hat, regul)
        else:
            W, H = update_nmf_sparse(V, W, H, V_hat, regul)
        # Transform Learning
        V_hat = np.dot(W, H)
        obj1 = is_div(V, V_hat) + regul * penalty(H, regul_type)
        Phi_old = Phi.copy()
        Phi, X = fast_transform_learning(Phi, X, V_hat, n_iter_tl)
        V = X ** 2
        # Monitoring
        old_obj = obj.copy()
        obj = is_div(V, V_hat) + regul * penalty(H, regul_type)
        eps = (old_obj - obj) / (np.abs(obj) + np.abs(old_obj))
        eps1 = old_obj - obj1
        eps2 = obj1 - obj
        delta_phi = np.mean(np.abs(Phi - Phi_old))
        delta_phi_init = np.mean(np.abs(Phi - Phi_init))

        obj_list.append(obj)
        eps_list.append(eps)
        tl_obj_list.append(eps2)
        nmf_obj_list.append(eps1)
        d_phi_list.append(delta_phi)
        d_phi_i_list.append(delta_phi_init)
        # Terminaison
        if np.abs(eps) < tol:
            break
        if verbose:
            print(' | '.join([("%d" % (n+1)).rjust(8),
                              ("%.2e" % obj).rjust(8),
                              ("%.2e" % eps).rjust(8),
                              ("%.2e" % eps1).rjust(8),
                              ("%.2e" % eps2).rjust(8),
                              ("%.2e" % delta_phi).rjust(8),
                              ("%.2e" % delta_phi_init).rjust(8)]))
    infos = dict(obj_list=obj_list, eps_list=eps_list, tl_obj_list=tl_obj_list,
                 nmf_obj_list=nmf_obj_list, d_phi_list=d_phi_list,
                 d_phi_i_list=d_phi_i_list)
    return Phi, W, H, Phi_init, infos
Exemple #51
0
 def Adct(x):
     y = dct(x, type=3, norm='ortho')
     y = y[pix_idx]
     return y
Exemple #52
0
window = tk.Tk(className="bla")
original = Image.fromarray(np.round(datal[:, :, ::-1] * 255).astype(np.uint8))

canvas = tk.Canvas(window, width=original.size[0], height=original.size[1])
canvas.pack()
image_tk = ImageTk.PhotoImage(original)

canvas.create_image(original.size[0] // 2,
                    original.size[1] // 2,
                    image=image_tk)

db = datal[:, :, 0]
dg = datal[:, :, 1]
dr = datal[:, :, 2]

dcb = fftpack.dct(fftpack.dct(db.T / w).T / h)
dcg = fftpack.dct(fftpack.dct(dg.T / w).T / h)
dcr = fftpack.dct(fftpack.dct(dr.T / w).T / h)

sb = np.sign(dcb[dh:, dw:])
sg = np.sign(dcb[dh:, dw:])
sr = np.sign(dcb[dh:, dw:])

sb[sb == 0] = 1
sg[sg == 0] = 1
sr[sr == 0] = 1

allh = np.empty((
    3,
    h - dh,
    w - dw,
Exemple #53
0
def DCT2(mat):
    return np.round(dct(dct(mat, norm='ortho').T, norm='ortho') // q_table)
mse_bo = 10 * np.log10((lp.norm(x - x1)**2) / lp.norm(x)**2)
print('BSBL-BO exit on %d loop' % clf.count)

plt.figure()
plt.plot(x, linewidth=3)
plt.plot(x1, 'r-')
plt.title('MSE of BO (directly) is ' + str(mse_bo) + 'dB')
plt.legend({'Original', 'Recovered'})

#=========================== Second Method ==============================
# First recover the signal's coefficients in the DCT domain;
# Then recover the signal using the DCT ceofficients and the DCT basis
#=========================================================================
A = np.zeros([M, N], dtype='float')
for k in xrange(M):
    dct_k = sf.dct(Phi[k, :].astype('float'), norm='ortho')
    A[k, :] = dct_k.copy()
#
clf = bsbl.bo(verbose=1,
              learn_type=1,
              learn_lambda=2,
              prune_gamma=-1,
              epsilon=1e-8,
              max_iters=16)
rev_dct_coeff = clf.fit_transform(A, y, blk_start_loc=groupStartLoc)
# IDCT only accept 'row' vector !
x2 = sf.idct(rev_dct_coeff, norm='ortho')
#
mse_bo_dct = 10 * np.log10((lp.norm(x - x2)**2) / lp.norm(x)**2)
print('BSBL-BO exit on %d loop' % clf.count)
  def extract_mfcc(self, draw=False):
    # 端点检测
    # begin, end = self.endpoint_detection(draw=draw)

    # 预加重
    self.pre_emphasis()
    # y = self.y[begin:end]
    y = self.y

    # 分帧
    sig_len = len(y)
    frame_size, frame_stride = 0.025, 0.01
    frame_len, frame_step = round(frame_size*self.sampling_freq), round(frame_stride*self.sampling_freq)
    num_frames = math.ceil((sig_len-frame_len) / frame_step)
    if num_frames is 0:
      return False, False

    pad_sig_len = num_frames*frame_step + frame_len
    pad_sig = np.append(y, np.zeros((pad_sig_len-sig_len)))

    indices = np.tile(np.arange(0, frame_len), (num_frames, 1)) + np.tile(np.arange(0, num_frames*frame_step, frame_step), (frame_len, 1)).T
    frames = pad_sig[np.mat(indices).astype(np.int32, copy=False)]
    
    # 加窗
    frames *= np.hamming(frame_len)

    # 傅里叶变换和功率谱
    NFFT = 512
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    pow_frames = (1.0/NFFT) * (mag_frames**2)

    # 转为MEL频率
    low_freq_mel = 0
    nfilt = 40
    high_freq_mel = 2595 * np.log10(1 + (self.sampling_freq/2)/700)
    mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt+2)
    hz_points = 700 * (10**(mel_points/2595) - 1)

    bin = np.floor((NFFT+1) * hz_points / self.sampling_freq)

    fbank = np.zeros((nfilt, int(np.floor(NFFT/2 + 1))))

    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])   # left
        f_m = int(bin[m])             # center
        f_m_plus = int(bin[m + 1])    # right
        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)  # Numerical Stability
    filter_banks = 20 * np.log10(filter_banks)  # dB

    num_ceps = 98
    cep_lifter = 22

    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1 : (num_ceps + 1)]
    (nframes, ncoeff) = mfcc.shape
    n = np.arange(ncoeff)

    lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
    mfcc *= lift  
    mfcc -= (np.mean(mfcc, axis=0) + 1e-8)

    return mfcc, filter_banks
Exemple #56
0
def mfcc_calc(file_address,
              frame_size=frame_size,
              frame_stride=frame_stride,
              nfilt=nfilt,
              frame_limit=frame_limit):
    sample_rate, signal = scipy.io.wavfile.read(
        file_address)  # sample_rate: number of samples per second
    # signal: 1D vector of audio data

    # create shorter-term frame for signal

    frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate
    signal_length = len(signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    if (signal_length > frame_length):
        num_steps = int(
            numpy.ceil(float(signal_length - frame_length) / frame_step))
    else:
        num_steps = 1
    num_frames = num_steps + 1
    pad_signal_length = num_steps * frame_step + frame_length  # number of zeros to pad at the end of signal
    pad_vector = numpy.zeros((pad_signal_length - signal_length))
    pad_signal = numpy.append(signal, pad_vector)
    indices = numpy.tile(numpy.arange(0, frame_length), (num_frames, 1)) + \
                numpy.tile(numpy.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    # indices in signal to slice to form frames
    frames = pad_signal[indices.astype(numpy.int32, copy=False)]

    # apply hamming function for FFT
    frames *= numpy.hamming(frame_length)

    #Report some values
    #print('sample_rate: ', sample_rate)
    #print('frame_length: ', frame_length)
    #print('frame_step: ', frame_step)
    #print('signal_length: ', signal_length)
    #print('num_frames: ', num_frames)
    #print('pad_signal_length: ', pad_signal_length)
    #print('frames: ', frames)

    # Fourier Transform and Power Spectrum
    NFFT = 512
    mag_frames = numpy.absolute(numpy.fft.rfft(frames,
                                               NFFT))  # Magnitude of the FFT
    pow_frames = ((1.0 / NFFT) * ((mag_frames)**2))  # Power Spectrum

    #Report some values
    #print('mag_frames: ', numpy.shape(mag_frames))
    #print('pow_frames: ', numpy.shape(pow_frames))

    # apply triangular filter

    low_freq_mel = 0
    high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700)
                     )  # Convert Hz to Mel
    mel_points = numpy.linspace(
        low_freq_mel, high_freq_mel,
        nfilt + 2)  # Equally spaced in Mel scale (incl. low&high freq)
    hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
    bin = numpy.floor((NFFT + 1) * hz_points / sample_rate)

    fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])  # left
        f_m = int(bin[m])  # center
        f_m_plus = int(bin[m + 1])  # right

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    filter_banks = numpy.dot(pow_frames, fbank.T)
    filter_banks = numpy.where(filter_banks == 0,
                               numpy.finfo(float).eps,
                               filter_banks)  # Numerical Stability
    filter_banks = 20 * numpy.log10(filter_banks)  # dB

    # Report some values
    #print('high_freq_mel: ', high_freq_mel)
    #print('mel_points: ', mel_points.shape)
    #print('hz_points: ', hz_points.shape)
    #print('bin: ', bin.shape)
    #print('fbank: ', fbank.shape)
    #print('filter_banks: ', filter_banks.shape)

    num_ceps = 12
    mfcc = dct(filter_banks, type=2, axis=1,
               norm='ortho')[:, 1:(num_ceps + 1)]  # Keep 2-13

    cep_lifter = 23
    (nframes, ncoeff) = mfcc.shape
    n = numpy.arange(ncoeff)
    lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter)
    mfcc *= lift

    # mean normalization
    mfcc -= (numpy.mean(mfcc, axis=0))

    mfcc_result = numpy.zeros((frame_limit, num_ceps))
    dim1 = len(mfcc)
    if (dim1 <= frame_limit):
        mfcc_result[:dim1, :] = mfcc
    else:
        mfcc_result[:, :] = mfcc[:frame_limit, :]

        # Report some values
    #print('dim1: ', dim1)
    #print('mfcc_result: ', mfcc_result.shape)
    #plt.imshow(mfcc_result, cmap='hot')
    #plt.show()

    #print(numpy.shape(mfcc_result.T.reshape(1,-1)))
    return mfcc_result.T.reshape(1, -1)
Exemple #57
0
def mfcc_fft(complexSpectrum):
    powerSpectrum = (abs(complexSpectrum) ** 2) / nFFt
    filteredSpectrum = numpy.dot(powerSpectrum, melFilterBank(nFFt))
    logSpectrum = numpy.log(filteredSpectrum)
    dctSpectrum = dct(logSpectrum, type=2)  # MFCC :)
    print(dctSpectrum)
def mfcc(audio, fs=16000, window_dt=0.025, dt=0.01, n_cepstra=13,
         n_filters=26, n_fft=512, minfreq=0, maxfreq=None, preemph=0.97,
         lift=22, energy=True, n_derivatives=0, deriv_spread=2):
    """Compute MFCC features from an audio signal.

    Parameters
    ----------
    audio : array_like (N, 1)
        The audio signal from which to compute features.
    fs : float, optional
        The samplerate of the signal we are working with. Default: 16000
    window_dt : float, optional
        The length of the analysis window in seconds.
        Default: 0.025 (25 milliseconds)
    dt : float, optional
        The step between successive windows in seconds.
        Default: 0.01 (10 milliseconds)
    n_cepstra : int, optional
        The number of cepstral coefficients to return. Default: 13
    n_filters : int, optional
        The number of filters in the filterbank. Default: 26
    n_fft : int, optional
        The FFT size. Default: 512
    minfreq : int, optional
        Lowest band edge of Mel filters, in Hz. Default: 0
    maxfreq : int, optional
        highest band edge of mel filters, in Hz. Default: fs / 2
    preemph : float, optional
        Apply preemphasis filter with preemph as coefficient; 0 is no filter.
        Default: 0.97
    lifter : float, optional
        Apply a lifter to final cepstral coefficients; 0 is no lifter.
        Default: 22.
    energy : bool, optional
        If this is true, the zeroth cepstral coefficient is replaced with the
        log of the total frame energy. Default: True
    n_derivatives : int, optional
        The number of derivatives to include in the feature vector.
        Affects the shape of the returned array. Default: 0
    deriv_spread : int, optional
        The spread of the derivatives to includ in the feature vector.
        Greater spread uses more frames to compute the derivative.
        Default: 2

    Returns
    -------
    A numpy array of shape (audio.shape[0], n_cepstra * (1 + n_derviatives)
    containing features. Each row holds 1 feature vector.
    """
    feat, energy_ = fbank(
        audio, fs, window_dt, dt, n_filters, n_fft, minfreq, maxfreq, preemph)
    feat = np.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:, :n_cepstra]
    feat = lifter(feat, lift)
    if energy:
        # replace first cepstral coefficient with log of frame energy
        feat[:, 0] = np.log(energy_)

    target = feat
    derivs = []
    for i in range(n_derivatives):
        derivs.append(derivative(target, deriv_spread))
        target = derivs[-1]
    return np.hstack([feat] + derivs)
Exemple #59
0
    def compute(self):
        qs = self.extrapolation.x
        iqs = self.extrapolation.y
        q = self.data.x
        background = self.background

        xs = np.pi*np.arange(len(qs),dtype=np.float32)/(q[1]-q[0])/len(qs)

        self.ready(delay=0.0)
        self.update(msg="Fourier transform in progress.")
        self.ready(delay=0.0)

        if self.check_if_cancelled(): return
        try:
            # ----- 1D Correlation Function -----
            gamma1 = dct((iqs-background)*qs**2)
            Q = gamma1.max()
            gamma1 /= Q

            if self.check_if_cancelled(): return

            # ----- 3D Correlation Function -----
            # gamma3(R) = 1/R int_{0}^{R} gamma1(x) dx
            # trapz uses the trapezium rule to calculate the integral
            mask = xs <= 200.0 # Only calculate gamma3 up to x=200 (as this is all that's plotted)
            # gamma3 = [trapz(gamma1[:n], xs[:n])/xs[n-1] for n in range(2, len(xs[mask]) + 1)]j
            # gamma3.insert(0, 1.0) # Gamma_3(0) is defined as 1
            n = len(xs[mask])
            gamma3 = cumtrapz(gamma1[:n], xs[:n])/xs[1:n]
            gamma3 = np.hstack((1.0, gamma3)) # Gamma_3(0) is defined as 1

            if self.check_if_cancelled(): return

            # ----- Interface Distribution function -----
            idf = dct(-qs**4 * (iqs-background))

            if self.check_if_cancelled(): return

            # Manually calculate IDF(0.0), since scipy DCT tends to give us a
            # very large negative value.
            # IDF(x) = int_0^inf q^4 * I(q) * cos(q*x) * dq
            # => IDF(0) = int_0^inf q^4 * I(q) * dq
            idf[0] = trapz(-qs**4 * (iqs-background), qs)
            idf /= Q # Normalise using scattering invariant

        except Exception as e:
            import logging
            logger = logging.getLogger(__name__)
            logger.error(e)

            self.update(msg="Fourier transform failed.")
            self.complete(transforms=None)
            return
        if self.isquit():
            return
        self.update(msg="Fourier transform completed.")

        transform1 = Data1D(xs, gamma1)
        transform3 = Data1D(xs[xs <= 200], gamma3)
        idf = Data1D(xs, idf)

        transforms = (transform1, transform3, idf)

        self.complete(transforms=transforms)
from scipy.fftpack import dct, idct

import math

if __name__ == '__main__':

    syllable_dict = Utility.load_obj(
        '/work/w2/decha/Data/GPR_speccom_data/Interspeech2017/syllable_dictionary_data_with_delta_deltadelta.pkl'
    )

    for syl in syllable_dict:

        print syl

        lf0 = syllable_dict[syl][0]

        y = 0.0
        for n, f in enumerate(lf0):
            y = y + f * math.cos(math.pi * 0 * (2.0 * n + 1) /
                                 (2.0 * len(lf0)))

        W = PoGUtility.generate_W_for_DCT(len(lf0), len(lf0))

        lf0_dct = dct(lf0, norm='ortho')

        print lf0_dct

        sys.exit()

    pass