Example #1
0
 def getDisplacements2D(self, Z=None, window=False):
     """
     Use phase correlation to find the relative displacement between
     each time step
     """
     if Z is None:
         Z = self.getNbPixelsPerFrame()/self.getNbPixelsPerSlice()/2
     shape = np.asarray(self.get2DShape())
     if window:
         ham = np.hamming(shape[1])*np.atleast_2d(np.hamming(shape[0])).T
     else:
         ham = 1.0
     displs = np.zeros((self.getNbFrames(),2))
     a = rfft2(self.get2DSlice(T=0, Z=Z)*ham)
     for t in range(1,self.getNbFrames()):
         b = rfft2(self.get2DSlice(T=t, Z=Z)*ham)
         #calculate the normalized cross-power spectrum
         #R = numexpr.evaluate(
         #    'a*complex(real(b), -imag(b)/abs(a*complex(real(b), -imag(b))))'
         #    )
         R = a*b.conj()
         Ra = np.abs(a*b.conj())
         R[Ra>0] /= Ra[Ra>0]
         r = irfft2(R)
         #Get the periodic position of the peak
         l = r.argmax()
         displs[t] = np.unravel_index(l, r.shape)
         #prepare next step
         a = b
     return np.where(displs<shape/2, displs, displs-shape)
Example #2
0
 def applyWindow(self, window="hanning", ww=0, cf=0):
     '''
     Apply window function to frequency domain data
     cf: the frequency the window is centered over [Hz]
     ww: the window width [Hz], if ww equals 0 the window covers the full range
     '''
     self.info("Applying %s window ..." % window)
     if window == "hanning":
         if ww == 0:
             w = np.hanning(self.numfreq)
         else:
             pos = int((cf - self.lowF) / self.deltaF)
             halfwidth = int(ww / (2.0 * self.deltaF))
             w = np.zeros(self.numfreq)
             w[pos - halfwidth:pos + halfwidth] = np.hanning(2 * halfwidth)
     elif window == "hamming":
         if ww == 0:
             w = np.hamming(self.numfreq)
         else:
             pos = int((cf - self.lowF) / self.deltaF)
             halfwidth = int(ww / (2.0 * self.deltaF))
             w = np.zeros(self.numfreq)
             w[pos - halfwidth:pos + halfwidth] = np.hamming(2 * halfwidth)
     elif window == "blackman":
         if ww == 0:
             w = np.blackman(self.numfreq)
         else:
             pos = int((cf - self.lowF) / self.deltaF)
             halfwidth = int(ww / (2.0 * self.deltaF))
             w = np.zeros(self.numfreq)
             w[pos - halfwidth:pos + halfwidth] = np.blackman(2 * halfwidth)
     self.data = self.data * w
     self.done()
Example #3
0
def lcn_mauch(X, kernel=None, rho=0):
    """Apply a version of local contrast normalization (LCN), inspired by
    Mauch, Dixon (2009), "Approximate Note Transcription...".

    Parameters
    ----------
    X : np.ndarray, ndim=2
        Input representation.
    kernel : np.ndarray
        Convolution kernel (should be roughly low-pass).
    rho : scalar
        Scalar applied to the final output for heuristic range control.

    Returns
    -------
    Z : np.ndarray
        The processed output.
    """
    if kernel is None:
        dim0, dim1 = 15, 37
        dim0_weights = np.hamming(dim0 * 2 + 1)[:dim0]
        dim1_weights = np.hamming(dim1)
        kernel = dim0_weights[:, np.newaxis] * dim1_weights[np.newaxis, :]

    kernel /= kernel.sum()
    Xh = convolve2d(X, kernel, mode='same', boundary='symm')
    V = hwr(X - Xh)
    S = np.sqrt(
        convolve2d(np.power(V, 2.0), kernel, mode='same', boundary='symm'))
    S2 = np.zeros(S.shape) + S.mean()
    S2[S > S.mean()] = S[S > S.mean()]
    if S2.sum() == 0.0:
        S2 += 1.0
    return V / S2**rho
def show_raw(item):
    global audio_x, audio_y, freq, curItem, filt
    curItem = item
    if l_rms is not None:
        p1.removeItem(l_rms)
        p2.removeItem(l_rms2)
    if l_fdlp is not None:
        p1.removeItem(l_fdlp)
        p2.removeItem(l_fdlp2)
    if l_tae is not None:
        p1.removeItem(l_tae)
        p2.removeItem(l_tae2)
    fn = item.text()
    w = ef.load_audio(fn)
    freq = w[2]
    p1.clear()
    p2.clear()
    x_r = np.arange(len(w[0]))/float(w[2])
    audio_x = x_r
    audio_y = w[0]/max(abs(w[0]))
    if filt is not None:
        ind = int(filt * len(audio_y)/freq)
        if ind > 1024:
            filter = np.append(np.zeros(ind-1024),np.hamming(2048),np.zeros(len(audio_y)-ind-1024))
        else:
            filter = np.append(np.hamming(2*ind),np.zeros(len(audio_y)-2*ind))
        audio_y = np.real(np.fft.ifft(np.fft.fft(audio_y)*filter))
        audio_y = audio_y/max(abs(audio_y))
    p1.plot(audio_x,audio_y,pen=(1,4))
    lr.setBounds([x_r[0],x_r[-1]])
    lr.setRegion([x_r[0],x_r[-1]])
    p1.addItem(lr)
    p2.plot(audio_x,np.abs(audio_y),pen=(1,4))
Example #5
0
def parse_ICA_results(ICA, buffer_window): #time
	signals = {}
	signals["id"] = "ICA"
	signals["bufferWindow"] = buffer_window

	# ** for 3 channels with ICA**
	one = np.squeeze(np.asarray(ICA[:, 0])).tolist()
	two = np.squeeze(np.asarray(ICA[:, 1])).tolist()
	three = np.squeeze(np.asarray(ICA[:, 2])).tolist()
	
	one = (np.hamming(len(one)) * one)
	two = (np.hamming(len(two)) * two)
	three = (np.hamming(len(three)) * three)

	one = np.fft.irfft(one).astype(float).tolist()
	two = np.fft.irfft(two).astype(float).tolist()
	three = np.fft.irfft(three).astype(float).tolist()

	power_ratio = [0, 0, 0]
	power_ratio[0] = np.sum(one)/np.amax(one)
	power_ratio[1] = np.sum(two)/np.amax(two)
	power_ratio[2] = np.sum(three)/np.amax(three)

	if np.argmax(power_ratio) == 0:
		signals["array"] = one
	elif np.argmax(power_ratio) == 1:
		signals["array"] = two
	else:
		signals["array"] = three

	print power_ratio
	print signals
	return signals
Example #6
0
def process_patch(X):
    win = np.outer(
        np.hamming(X.shape[0]), np.hamming(X.shape[1])
    )
    if np.any(np.iscomplex(X)):
        return np.abs(np.fft.fftn(X*win))**0.5
    else:
        return np.abs(np.fft.rfftn(X*win))**0.5
def average_energy(audio, fs=44100, n=1024):

    Ew = np.sum(np.hamming(n)**2)
    result = np.empty(len(audio)/n)
    for i in range(0,len(audio)/n):
        result[i] = (np.sum(np.absolute(np.hamming(n)*audio[i*n:(i+1)*n]))/(float(n)*Ew))
    t = np.arange(len(result)) * (float(n)/fs)      
    return result, t
Example #8
0
 def getDispl2DImage(self, t0=0, t1=1, Z=0):
     ham = np.hamming(self.get2DShape()[1])*np.atleast_2d(np.hamming(self.get2DShape()[0])).T
     a = rfft2(self.get2DSlice(T=t0, Z=Z)*ham)
     b = rfft2(self.get2DSlice(T=t1, Z=Z)*ham)
     R = numexpr.evaluate(
         'a*complex(real(b), -imag(b)/abs(a*complex(real(b), -imag(b))'
         )
     return irfft2(R)
Example #9
0
def align(frames, template):
    """
    Warp each slice of the 3D array frames to align it to *template*.

    """
    if frames.shape[:2] != template.shape:
        raise ValueError('Template must be same shape as one slice of frame array')

    # Calculate xs and ys to sample from one frame
    xs, ys = np.meshgrid(np.arange(frames.shape[1]), np.arange(frames.shape[0]))

    # Calculate window to use in FFT convolve
    w = np.outer(np.hamming(template.shape[0]), np.hamming(template.shape[1]))

    # Calculate a normalisation for the cross-correlation
    ccnorm = 1.0 / fftconvolve(w, w)

    # Set border of normalisation to zero to avoid overfitting. Borser is set so that there
    # must be a minimum of half-frame overlap
    ccnorm[:(template.shape[0]>>1),:] = 0
    ccnorm[-(template.shape[0]>>1):,:] = 0
    ccnorm[:,:(template.shape[1]>>1)] = 0
    ccnorm[:,-(template.shape[1]>>1):] = 0

    # Normalise template
    tmpl_min = template.min()
    norm_template = template - tmpl_min
    tmpl_max = norm_template.max()
    norm_template /= tmpl_max

    warped_ims = []
    for frame_idx in xrange(frames.shape[2]):
        logging.info('Aligning frame {0}/{1}'.format(frame_idx+1, frames.shape[2]))
        frame = frames[:,:,frame_idx]

        # Normalise frame
        norm_frame = frame - tmpl_min
        norm_frame /= tmpl_max

        # Convolve template and frame
        conv_im = fftconvolve(norm_template*w, np.fliplr(np.flipud(norm_frame*w)))
        conv_im *= ccnorm

        # Find maximum location
        max_loc = np.unravel_index(conv_im.argmax(), conv_im.shape)

        # Convert location to shift
        dy = max_loc[0] - template.shape[0] + 1
        dx = max_loc[1] - template.shape[1] + 1
        logging.info('Offset computed to be ({0},{1})'.format(dx, dy))

        # Warp image
        warped_ims.append(dtcwt.sampling.sample(frame, xs-dx, ys-dy, method='bilinear'))

    return np.dstack(warped_ims)
Example #10
0
def apply_hamming(frames, inv=False):
    """
    Computes either the hamming window or its inverse and applies
    it to a sequence of frames.

    :param frames: Frames with dimension num_frames x num_elements_per_frame
    :param inv: Indicates if the window should be inversed.
    :return:
    """
    M = frames.shape[1]
    win = np.hamming(M)**(-1) if inv else np.hamming(M)
    return frames * win
Example #11
0
def get_image_data(filename):
    im = pygame.image.load(filename)
    sz = im.get_size()
    im = pygame.transform.scale(im, (sz[0]/SCALE_FACTOR, sz[1]/SCALE_FACTOR))
    im2 = im.convert(8)
    a = pygame.surfarray.array2d(im2)
    hw1 = numpy.hamming(a.shape[0])
    hw2 = numpy.hamming(a.shape[1])
    a = a.transpose()
    a = a*hw1
    a = a.transpose()
    a = a*hw2
    return a
Example #12
0
def notSoRandomWalk(shape, std=1, trendFilterLength=32, lpfLength=16):
	"""bandpass filter a random walk so that the low-frequency trend /
	drift is eliminated and the high-frequency noise is attenuated"""
	walk = randwalk(shape, std=std)
	filt = np.hamming(trendFilterLength)
	filt /= np.sum(filt)
	whichAxis = len(walk.shape) > 1 # 0 iff 1d, else 1
	# subtract baseline drift, roughly
	trend = filters.convolve1d(walk, weights=filt, axis=whichAxis, mode='reflect')
	walk -= trend
	# subtract noisey spikes
	walk = filters.convolve1d(walk, weights=np.hamming(lpfLength), axis=whichAxis, mode='reflect')
	return walk
Example #13
0
def cfrequency(data, fs, smoothie, fk):
    """
    Central frequency of a signal.

    Computes the central frequency of the given data which can be windowed or
    not. The central frequency is a measure of the frequency where the
    power is concentrated. It corresponds to the second moment of the power
    spectral density function.

    The central frequency is returned.

    :type data: :class:`~numpy.ndarray`
    :param data: Data to estimate central frequency from.
    :param fs: Sampling frequency in Hz.
    :param smoothie: Factor for smoothing the result.
    :param fk: Coefficients for calculating time derivatives
        (calculated via central difference).
    :return: **cfreq[, dcfreq]** - Central frequency, Time derivative of center
        frequency (windowed only).
    """
    nfft = util.nextpow2(data.shape[1])
    freq = np.linspace(0, fs, nfft + 1)
    freqaxis = freq[0:nfft / 2]
    cfreq = np.zeros(data.shape[0])
    if np.size(data.shape) > 1:
        i = 0
        for row in data:
            Px_wm = welch(row, np.hamming(len(row)), util.nextpow2(len(row)))
            Px = Px_wm[0:len(Px_wm) / 2]
            cfreq[i] = np.sqrt(np.sum(freqaxis ** 2 * Px) / (sum(Px)))
            i = i + 1
        cfreq = util.smooth(cfreq, smoothie)
        #cfreq_add = \
        #        np.append(np.append([cfreq[0]] * (np.size(fk) // 2), cfreq),
        #        [cfreq[np.size(cfreq) - 1]] * (np.size(fk) // 2))
        # faster alternative
        cfreq_add = np.hstack(
            ([cfreq[0]] * (np.size(fk) // 2), cfreq,
             [cfreq[np.size(cfreq) - 1]] * (np.size(fk) // 2)))
        dcfreq = signal.lfilter(fk, 1, cfreq_add)
        #dcfreq = dcfreq[np.size(fk) // 2:(np.size(dcfreq) - np.size(fk) // 2)]
        # correct start and end values of time derivative
        dcfreq = dcfreq[np.size(fk) - 1:np.size(dcfreq)]
        return cfreq, dcfreq
    else:
        Px_wm = welch(data, np.hamming(len(data)), util.nextpow2(len(data)))
        Px = Px_wm[0:len(Px_wm) / 2]
        cfreq = np.sqrt(np.sum(freqaxis ** 2 * Px) / (sum(Px)))
        return cfreq
Example #14
0
    def __init__(self, winSecs, soundSecs, sampleRate):
        """

        :param winSecs:
        :param soundSecs:
        :param sampleRate:
        """
        self.sampleRate = sampleRate
        self.winSecs = winSecs
        self.winSamples = int(round(sampleRate*winSecs))
        self.soundSecs = soundSecs
        self.soundSamples = int(round(sampleRate*soundSecs))
        self.startWindow = numpy.hamming(self.winSamples*2)[0:self.winSamples]
        self.endWindow = numpy.hamming(self.winSamples*2)[self.winSamples:]
        self.finalWinStart = self.soundSamples-self.winSamples
	def make_wave(self):
		# inverts the spectrogram and returns a wave
		#
		# return: Wave

		res = []
		for t, spectrum in sorted(self.spec_map.iteritems()):
			wave = spectrum.make_wave()
			n = len(wave)

			window = 1 / np.hamming(n)
			wave.ys *= window

			i = wave.find_index(t)
			start = i - (n // 2)
			end = start + n
			res.append((start, end, wave))

		starts, ends, waves = zip(*res)
		low = min(starts)
		high = max(ends)

		ys = np.zeros(high - low, np.float)
		for start, end, wave in res:
			ys[start:end] = wave.ys

		return Wave(ys, framerate=wave.framerate)
def smooth(params, win, mode = _SMOOTH):
    
    """
    gaussian smoothing
    """
    if win >= len(params)-1:
        win = len(params)-1
    if win % 2 != 0:
        win+=1
    
    s = np.r_[params[win-1:0:-1],params,params[-1:-win:-1]]
    w = np.hamming(win)
    
        
    y = np.convolve(w/w.sum(),s,mode='valid')

    if mode == _DETREND:
        
        yy = y[(win/2-1):-(win/2)]
        return params-yy

    elif mode == _TREND:
        return y[(win/2-1):-(win/2)]
    else: 
        return y[(int(round(win/2))-1):-(int(round(win/2)))]
Example #17
0
def makeimg(wav):
	global callpath
	global imgpath

	fs, frames = wavfile.read(os.path.join(callpath, wav))
	
	pylab.ion()

	# generate specgram
	pylab.figure(1)
	
	# generate specgram
	pylab.specgram(
		frames,
		NFFT=256, 
		Fs=22050, 
		detrend=pylab.detrend_none,
		window=numpy.hamming(256),
		noverlap=192,
		cmap=pylab.get_cmap('Greys'))
	
	x_width = len(frames)/fs
	
	pylab.ylim([0,11025])
	pylab.xlim([0,round(x_width,3)-0.006])
	
	img_path = os.path.join(imgpath, wav.replace(".wav",".png"))

	pylab.savefig(img_path)
	
	return img_path
Example #18
0
def iff_filter(sig, scale, plot_show = 0):
    
    order = max(sig.size*scale,90)
    #order = 80
    # Extend signal on both sides for removing boundary effect in convolution
    sig_extend = np.ones(sig.size+int(order/2)*2)
    sig_extend[int(order/2):(sig.size+int(order/2))] = sig
    sig_extend[0:int(order/2)] = sig[(sig.size-int(order/2)):sig.size]
    sig_extend[(sig.size+int(order/2)):sig_extend.size] = sig[0:int(order/2)]
    
    # convolve with hamming window and normalize
    smooth_sig = np.convolve(sig_extend,np.hamming(order),'same')
    smooth_sig = smooth_sig[int(order/2):(sig.size+int(order/2))]
    smooth_sig = np.amax(sig)/np.amax(smooth_sig)*smooth_sig

    # Plot signal for debug
    if(plot_show == 1):
        fig, ax = plt.subplots(ncols=2)
        ax[0].plot(sig)
        ax[0].plot(smooth_sig,'-r')
        ax[0].plot(med_sig,'black')
        ax[1].loglog(rfft(sig))
        ax[1].loglog(rfft(smooth_sig),'-r')
        ax[1].loglog(rfft(med_sig),'black')
        plt.show()
        
    return smooth_sig
Example #19
0
def window (v, func='hanning', params=None):
    """ applies a windowing function to the 3D volume v (inplace, as reference) """
    
    N = v.shape[0]
    D = v.ndim
    if any( [ d != N for d in list(v.shape) ] ) or D != 3:
        raise Exception("Error: Volume is not Cube.")
    
    def apply_seperable_window (v, w):
        v *= n.reshape(w,(-1,1,1))
        v *= n.reshape(w,(1,-1,1))
        v *= n.reshape(w,(1,1,-1))
    
    if func=="hanning":
        w = n.hanning(N)
        apply_seperable_window(v,w)
    elif func=='hamming':
        w = n.hamming(N)
        apply_seperable_window(v,w)
    elif func=='gaussian':
        raise Exception('Unimplimented')
    elif func=='circle':
        c = gencoords(N,3)
        if params==None:
            r = N/2 -1
        else:
            r = params[0]*(N/2*1)
        v *= (n.sum(c**2,1)  < ( r ** 2 ) ).reshape((N,N,N))
    elif func=='box':
        v[:,0,0] = 0.0
        v[0,:,0] = 0.0
        v[0,0,:] = 0.0
    else:
        raise Exception("Error: Window Type Not Supported")
def compute_pitch_hps(x, Fs, dF=None, Fmin=30., Fmax=900., H=5):
    # default value for dF frequency resolution
    if dF == None:
        dF = Fs / x.size
    
    # Hamming window apodization
    x = x.copy()
    x *= np.hamming(x.size)

    # number of points in FFT to reach the resolution wanted by the user
    n_fft = np.ceil(Fs / dF)

    # DFT computation
    X = np.abs(np.fft.fft(x, n=int(n_fft)))
    
    # limiting frequency R_max computation
    R = np.floor(1 + n_fft / 2. / H)

    # computing the indices for min and max frequency
    N_min = np.ceil(Fmin / Fs * n_fft)
    N_max = np.floor(Fmax / Fs * n_fft)
    N_max = min(N_max, R)
    
    # harmonic product spectrum computation
    indices = (np.arange(N_max)[:, np.newaxis] * np.arange(1, H+1)).astype(int)
    P = np.prod(X[indices.ravel()].reshape(N_max, H), axis=1)
    ix = np.argmax(P * ((np.arange(P.size) >= N_min) & (np.arange(P.size) <= N_max)))
    return dF * ix
Example #21
0
    def make_wave(self):
        """Inverts the spectrogram and returns a Wave.

        returns: Wave
        """
        res = []
        for t, spectrum in sorted(self.spec_map.iteritems()):
            wave = spectrum.make_wave()
            n = len(wave)
            
            window = 1 / np.hamming(n)
            wave.window(window)

            i = wave.find_index(t)
            start = i - n // 2
            end = start + n
            res.append((start, end, wave))

        starts, ends, waves = zip(*res)
        low = min(starts)
        high = max(ends)

        ys = np.zeros(high-low, np.float)
        for start, end, wave in res:
            ys[start:end] = wave.ys

        # ts = np.arange(len(ys)) / self.framerate
        return Wave(ys, framerate=wave.framerate)
Example #22
0
    def make_spectrogram(self, seg_length, win_flag=True):
        """Computes the spectrogram of the wave.

        seg_length: number of samples in each segment
        win_flag: boolean, whether to apply hamming window to each segment

        returns: Spectrogram
        """
        if win_flag:
            window = np.hamming(seg_length)
        i, j = 0, seg_length
        step = seg_length / 2

        # map from time to Spectrum
        spec_map = {}

        while j < len(self.ys):
            segment = self.slice(i, j)
            if win_flag:
                segment.window(window)

            # the nominal time for this segment is the midpoint
            t = (segment.start + segment.end) / 2
            spec_map[t] = segment.make_spectrum()

            i += step
            j += step

        return Spectrogram(spec_map, seg_length)
Example #23
0
def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1):

	originalWav = Sndfile(inputAudio, 'r')
	x = originalWav.read_frames(originalWav.nframes)
	fs = originalWav.samplerate
	nChannel = originalWav.channels
	print fs
	if nChannel >1:
		x = x[0]


	w = np.hamming(801)
	N = 2048
	t = -90
	minSineDur = .005
	maxnSines = 150
	freqDevOffset = 20
	freqDevSlope = 0.02
	Ns = 512
	H = Ns/4
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
	inputDur = float(len(tfreq)*H/fs)
	#timeScale = np.array([0.1,0.1, inputDur, inputDur*2])
	timeScale = np.array([0,0, .4,outputDuration])

	ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale)
	y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)
	
	if writeOutput ==1:
		outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate)
		outputWav.write_frames(y)
		outputWav.close()
	else:
		return y, fs, nChannel
Example #24
0
def smooth(input_data, nth_octave = 6, window_type='hamming'):
    """ Smooth input data over 1/n octave """

    f_min = 30
    f_max = 20e3

    number_of_octaves = math.log(f_max / f_min, 2)

    # ideally, this should be computed from the display resolution
    number_of_points = 4048
    points_per_octave = number_of_points / number_of_octaves

    log_data = _distribute_over_log(input_data, f_min, f_max, 
                                                 number_of_points)

    window_length = points_per_octave / nth_octave

    if window_type == 'hamming':
        window = np.hamming(window_length)
    elif window_type == 'bartlett':
        window = np.bartlett(window_length)
    elif window_type == 'blackman':
        window = np.blackman(window_length)
    elif window_type == 'hanning':
        window = np.hanning(window_length)

    output = np.convolve(window / window.sum(), log_data, mode='same')
    return output
Example #25
0
 def calibrate_adc_snapshot(self, raw_data):
     """
     Calibrates a raw ADC count timedomain snapshot.
     Returns ADC samples in V, ADC spectrum in dBm,
     input spectrum in dBm and input spectrum of
     n_chans in dBm.
     """
     ret = {}
     ret['adc_mixed'] = numpy.array(raw_data)
     if self.config['flip_spectrum']:
         ret['adc_mixed'][::2] *= -1
     ret['adc_v'] = ret['adc_mixed']*self.config['adc_v_scale_factor']
     ret['input_v'] = ret['adc_v']*self.get_input_scale_factor()
     n_accs = len(raw_data)/self.config['n_chans']/2
     window = numpy.hamming(self.config['n_chans']*2)
     spectrum = numpy.zeros(self.config['n_chans'])
     ret['n_accs'] = n_accs
     # if n_accs < 1:
     #     n_accs = 1
     for acc in range(n_accs):
         # print "for acc ", acc
         spectrum +=\
             numpy.abs((numpy.fft.rfft(
                 ret['adc_v'][self.config['n_chans']*2*acc:\
                              self.config['n_chans']*2*(acc+1)]*window)[0:self.config['n_chans']]))
     ret['adc_spectrum_dbm'] =\
         20*numpy.log10(spectrum/n_accs/self.config['n_chans']*6.14)
     ret['input_spectrum_dbm'] = ret[
         'adc_spectrum_dbm']-(self.config['system_bandpass'])
     if self.config['antenna_bandpass_calfile'] != 'none':
         ret['input_spectrum_dbuv'] = dbm_to_dbuv(ret[
             'input_spectrum_dbm']) + self.config['antenna_factor']
     return ret
Example #26
0
def chromagram(x,fs,length=[],minFreq=27.5,octaves=8,bins=12,thresh=0,window=[],step=[],k=[],verbose=False):
	# Setup variables
	if not length:
		length = np.ceil(fs/50)
	if not isinstance(window,np.ndarray):
		window = np.hamming(length)
	if window.size != length:
		raise Exception('Window lengths do not match!')
	if not step:
		step = np.floor(length/2)
	nsteps = int(np.floor((x.shape[0]-length)/step) + 1)
	c = np.zeros((nsteps,bins))

	# Create kernel
	if k == []:
		k = kernel(minFreq,octaves,fs,bins=bins,thresh=thresh)

	for ind in range(nsteps):
		if verbose:
			print(ind,'/',nsteps)
		selection = x[ind*step:ind*step+length]
		c[ind,] += chroma(selection,k,bins=bins)
	return c

# def chromagramviz(c):
# 	plt.pcolor(np.fliplr(c.transpose()))
# 	plt.show()
	def make_spectrogram(self, seg_length, win_flag=True):
		# computes the spectrogram of the wave
		#
		# seg_length: number of samples in each segment
		# win_flag: boolean, whether to apply hamming window to each segment
		#
		# return: Spectrogram

		if win_flag:
			window = np.hamming(seg_length)		# sequence of multipliers that are the same length as the wave segment

		i = 0
		j = seg_length
		step = seg_length / 2

		# map from time to spectrum   
		spec_map = {}

		while j < len(self.ys):
			segment = self.slice(i, j)
			
			if win_flag:
				segment.ys *= window		# apply window function to the wave segment 

			t = (segment.start + segment.end) / 2		# the nominal time for this segment is the midpoint
			spectrum = segment.make_spectrum()
			spec_map[t] = spectrum

			i += step
			j += step

		return Spectrogram(spec_map, seg_length)
Example #28
0
def getPerio(ts, freq=None, sampFreq=1., tapeWindow=None):
    ''' Get the periodogram of ts using a taping window of length tape window'''
    nt = ts.shape[0]

    # If no tapeWindow given then do not tape
    if tapeWindow is None:
        tapeWindow = nt
    nTapes = int(nt / tapeWindow)
    window = np.hamming(tapeWindow)

    # Get frequencies if not given
    if freq is None:
        freq = getFreqPow2(tapeWindow, sampFreq=sampFreq)
    nfft = freq.shape[0]

    # Get periodogram averages over nTapes windows
    perio = np.zeros((nfft,))
    perioSTD = np.zeros((nfft,))
    for tape in np.arange(nTapes):
        tsTape = ts[tape*tapeWindow:(tape+1)*tapeWindow] 
        tsTape -= tsTape.mean(0)
        tsWindowed = tsTape * window
        # Fourier transform and shift zero frequency to center
        fts = np.fft.fft(tsWindowed, nfft, 0)
        fts = np.fft.fftshift(fts)
        # Get periodogram
        perio += np.abs(fts)**2 / np.sum(np.abs(fts)**2)
        perioSTD += (np.abs(fts)**2 / np.sum(np.abs(fts)**2))**2
    perio /= nTapes
    perioSTD = np.sqrt(perioSTD / nTapes)

    return (freq, perio, perioSTD)
Example #29
0
def mfcc(s, fs):
	#Constants
	N = 256
	M = 100
	P = 30
	l = int(math.ceil((s.size-N+1)/M))

	#Allocate c array
	c = np.zeros((P,l));

	for x in range(0,l-1):
		#Frame
		start = x * M;
		frame = s[start:start+N];

		#Window
		w = np.hamming(N)
		windFrame = frame * w

		#FFT
		frameFFT = np.fft.fft(windFrame)

		#Mel-Frequency Wrapping
		m = get_filterbanks(P,N,fs)
		n2 = math.floor(N/2)
		ms = np.dot(m , abs(np.power(frameFFT[0:n2+1],2)))
		#Last step, compute mel-frequency cepstrum coefficients
		c[:,x] = fft.dct(np.log(ms.clip(min=0.00001)));
	np.delete(c,0,0)    # exclude 0'th order cepstral coefficient
	
	return c
Example #30
0
def framesig(sig, frame_len, frame_step, winfunc=lambda x:numpy.ones((1, x))):
    """Frame a signal into overlapping frames.

    :param sig: the audio signal to frame.
    :param frame_len: length of each frame measured in samples.
    :param frame_step: number of samples after the start of the previous frame that the next frame should begin.
    :param winfunc: the analysis window to apply to each frame. By default no window is applied.    
    :returns: an array of frames. Size is NUMFRAMES by frame_len.
    """
    slen = len(sig)
    frame_len = int(round(frame_len))
    frame_step = int(round(frame_step))
    if slen <= frame_len: 
        numframes = 1
    else:
        numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step))
        
    padlen = int((numframes - 1) * frame_step + frame_len)
    
    zeros = numpy.zeros((padlen - slen,))
    padsignal = numpy.concatenate((sig, zeros))
    
    indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T
    indices = numpy.array(indices, dtype=numpy.int32)
    frames = padsignal[indices]
    win = numpy.tile(numpy.hamming(frame_len), (numframes, 1))
    return frames * win
Example #31
0
                continue
            break
    return pixels_gandalf_random


fft_plot_filter = dsp.ExpFilter(np.tile(1e-1, config['N_FFT_BINS']),
                                alpha_decay=0.5,
                                alpha_rise=0.99)
mel_gain = dsp.ExpFilter(np.tile(1e-1, config['N_FFT_BINS']),
                         alpha_decay=0.01,
                         alpha_rise=0.99)
mel_smoothing = dsp.ExpFilter(np.tile(1e-1, config['N_FFT_BINS']),
                              alpha_decay=0.5,
                              alpha_rise=0.99)
# volume = dsp.ExpFilter(config['MIN_VOLUME_THRESHOLD'], alpha_decay=0.02, alpha_rise=0.02)
fft_window = np.hamming(
    int(config['MIC_RATE'] / config['FPS']) * config['N_ROLLING_HISTORY'])
prev_fps_update = time.time()


def microphone_update(audio_samples):
    global y_roll, prev_rms, prev_exp, prev_fps_update, n_frame
    # Normalize samples between 0 and 1
    y = audio_samples / 32767.0
    # Construct a rolling window of audio samples
    y_roll[:-1] = y_roll[1:]
    y_roll[-1, :] = np.copy(y)
    y_data = np.concatenate(y_roll, axis=0).astype(np.float32)

    vol = np.max(np.abs(y_data))
    if vol < config['MIN_VOLUME_THRESHOLD']:
        if config['TURN_OFF_ON_SILENCE']:
Example #32
0
def makeMask(matrixSize,
             shape='circle',
             radius=1.0,
             center=(0.0, 0.0),
             range=[-1, 1],
             fringeWidth=0.2):
    """
    Returns a matrix to be used as an alpha mask (circle,gauss,ramp)

    :Parameters:
            matrixSize: integer
                the size of the resulting matrix on both dimensions (e.g 256)
            shape:  'circle','gauss','ramp' (linear gradient from center),
                    'raisedCosine' (the edges are blurred by a raised cosine)
                shape of the mask
            radius:  float
                scale factor to be applied to the mask (circle with radius of
                [1,1] will extend just to the edge of the matrix). Radius can
                asymmetric, e.g. [1.0,2.0] will be wider than it is tall.
            center:  2x1 tuple or list (default=[0.0,0.0])
                the centre of the mask in the matrix ([1,1] is top-right corner,
                [-1,-1] is bottom-left)
            fringeWidth: float (0-1)
                The proportion of the raisedCosine that is being blurred.
            range: 2x1 tuple or list (default=[-1,1])
                The minimum and maximum value in the mask matrix
            """
    rad = makeRadialMatrix(matrixSize, center, radius)
    if shape == 'ramp':
        outArray = 1 - rad
    elif shape == 'circle':
        #outArray=numpy.ones(matrixSize,'f')
        outArray = numpy.where(numpy.greater(rad, 1.0), 0.0, 1.0)
    elif shape == 'gauss':
        outArray = makeGauss(rad, mean=0.0, sd=0.33333)
    elif shape == 'raisedCosine':
        hamming_len = 1000  # This affects the 'granularity' of the raised cos
        fringe_proportion = fringeWidth  # This one affects the proportion of the
        # stimulus diameter that is devoted to the
        # raised cosine.

        rad = makeRadialMatrix(matrixSize, center, radius)
        outArray = numpy.zeros_like(rad)
        outArray[numpy.where(rad < 1)] = 1
        raised_cos_idx = numpy.where(
            [numpy.logical_and(rad <= 1, rad >= 1 - fringe_proportion)])[1:]

        # Make a raised_cos (half a hamming window):
        raised_cos = numpy.hamming(hamming_len)[:hamming_len / 2]
        raised_cos -= numpy.min(raised_cos)
        raised_cos /= numpy.max(raised_cos)

        # Measure the distance from the edge - this is your index into the hamming window:
        d_from_edge = numpy.abs((1 - fringe_proportion) - rad[raised_cos_idx])
        d_from_edge /= numpy.max(d_from_edge)
        d_from_edge *= numpy.round(hamming_len / 2)

        # This is the indices into the hamming (larger for small distances from the edge!):
        portion_idx = (-1 * d_from_edge).astype(int)

        # Apply the raised cos to this portion:
        outArray[raised_cos_idx] = raised_cos[portion_idx]

        #Sometimes there are some remaining artifacts from this process, get rid of them:
        artifact_idx = numpy.where(numpy.logical_and(outArray == 0,
                                                     rad < 0.99))
        outArray[artifact_idx] = 1
        artifact_idx = numpy.where(numpy.logical_and(outArray == 1,
                                                     rad > 0.99))
        outArray[artifact_idx] = 0

    else:

        raise ValueError('Unknown value for shape argument %s' % shape)
    mag = range[1] - range[0]
    offset = range[0]
    return outArray * mag + offset
Example #33
0
# 窗函数(矩形、汉明、汉宁)
import matplotlib.pyplot as plt
import numpy as np


N = 32
nn = [i for i in range(N)]
plt.figure(figsize=(16, 12))
plt.subplot(3, 1, 1)
plt.stem(np.ones(N))
plt.title('Rectangle window')

# w = 0.54 - 0.46 * np.cos(np.multiply(nn, 2 * np.pi) / (N - 1))
w = np.hamming(N)
plt.subplot(3, 1, 2)
plt.stem(w)
plt.title('Hamming window')

# w = 0.5 * (1 - np.cos(np.multiply(nn, 2 * np.pi) / (N - 1)))
w = np.hanning(N)
plt.subplot(3, 1, 3)
plt.stem(w)
plt.title('Hanning window')
plt.savefig('images/window.png')
plt.show()
plt.close()
Example #34
0
        i = (t - t0) / fl_n2
        #      s = wav[center - cuttime/2*fs : center + cuttime/2*fs]
        s = wav[t:t + fl_n]
        #      import pdb;pdb.set_trace(); #for debug
        #      plot(s); mypltshow('tmp/y0i{}t{}-{}.eps'.format(i,t,t+fl_n));
        mp = np.sum(s**2) / fl_n  #mean power
        mP.append(mp)
        if mp < args.mp_th:
            continue
        # プリエンファシスフィルタをかける
        p = 0.97  # プリエンファシス係数
        s = preEmphasis(s, p)
        #      plot(s); mypltshow('tmp/y1i{}t{}-{}.eps'.format(i,t,t+fl_n));
        # ハミング窓をかける
        if args.HW == 1:
            hammingWindow = np.hamming(len(s))
            s = s * hammingWindow
#      s = s * hammingWindow
# LPC係数を求める
#    lpcOrder = 32
        lpcOrder = args.k  # LPC係数の次数
        r = autocorr(s, lpcOrder + 1)
        if r[0] != 0:
            # import pdb;pdb.set_trace(); #for debug
            a, e = LevinsonDurbin(r, lpcOrder)
            if np.linalg.norm(
                    a[1:-1] > 0.05):  #20210502 for removing log|p|=-578....
                #        if np.any(a[1:-1]!=0):
                A.append(a)
                #print "*** result ***"
                E.append(e)
Example #35
0
        indexFile = '%s_%s_%d_%05d_%05d.txt' \
                    % (indexChoice[0], restartState, S*10, firstYear, lastYearRng[k])
        os.system('mkdir %s %s/perio 2> /dev/null' % (dstDir, dstDir))
        print 'Reading index file %s...' % indexFile
        observable = np.loadtxt('%s/%s' % (indicesPath, indexFile))
    ntFull = observable.shape[0]
    obsName += '_%s' % indexChoice[0]

    # Get time steps array
    time = np.arange(spinup, ntFull)
    nt = ntFull - spinup
    observable = observable[spinup:]

    # Get periodogram
    print 'Getting periodogram...'
    window = np.hamming(nt)
    # Get nearest larger power of 2
    if np.log2(nt) != int(np.log2(nt)):
        nfft = 2**(int(np.log2(nt)) + 1)
    else:
        nfft = nt
    # Get frequencies and shift zero frequency to center
    freq = np.fft.fftfreq(nfft, d=1./sampFreq)
    freq = np.fft.fftshift(freq)
    freqYear = freq * daysPerYear

    # Apply window and remove sample mean
    tsWindowed = observable * window
    tsWindowed -= tsWindowed.mean()
    # Fourier transform and shift zero frequency to center
    fts = np.fft.fft(tsWindowed, nfft, 0)
Example #36
0
def calc_fbank(url, config):
    """Calculate Fbank feature of a audio file.

    Parameters
    ----------
    url : ``str``
        Path to the audio file.

    Returns
    -------
    fbank : ``np.ndarray``
        Fbank feature of this audio.
    """
    sample_rate, signal = read(url)
    pre_emphasis = 0.97  #config.Audio_emphasis
    frame_size = 0.025  #config.Audio_frame_size
    frame_stride = 0.01  #config.Audio_frame_stride
    NFFT = 512  #config.Audio_NFFT
    nfilt = config.Audio_n_filt

    emphasized_signal = np.append(signal[0],
                                  signal[1:] - pre_emphasis * signal[:-1])
    # convert from seconds to samples
    frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate
    signal_length = len(emphasized_signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    # Make sure that we have at least 1 frame
    num_frames = int(
        np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))

    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))

    # Pad Signal to make sure that all frames have equal number of samples
    # without truncating any samples from the original signal
    pad_signal = np.append(emphasized_signal, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    frames *= np.hamming(frame_length)
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))  # Magnitude of the FFT
    pow_frames = ((1.0 / NFFT) * ((mag_frames)**2))  # Power Spectrum

    low_freq_mel = 0
    high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700)
                     )  # Convert Hz to Mel
    mel_points = np.linspace(low_freq_mel, high_freq_mel,
                             nfilt + 2)  # Equally spaced in Mel scale
    hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
    bin = np.floor((NFFT + 1) * hz_points / sample_rate)

    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])  # left
        f_m = int(bin[m])  # center
        f_m_plus = int(bin[m + 1])  # right

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0,
                            np.finfo(float).eps,
                            filter_banks)  # Numerical Stability
    filter_banks = 20 * np.log10(filter_banks)
    filter_banks -= (np.mean(filter_banks, axis=0) + 1e-8)
    filter_banks = cmvn(filter_banks)
    return filter_banks
Example #37
0
def dab_run(snr_list, file_name="dab_out", mode='dab'):

    output_file_folder = os.path.join("data_eval", mode)

    # removing previous enhancements
    for file in os.listdir(os.path.join("data_eval", "dnn1_out")):
        file_path = os.path.join("data_eval", "dnn1_out", file)
        os.remove(file_path)

    dnn1_inputs, dnn1_outputs = dnn1.predict_folder(
        os.path.join("data_eval", "dnn1_in"),
        os.path.join("data_eval", "dnn1_out"))

    names = [
        f for f in sorted(os.listdir(os.path.join("data_eval", "dnn1_out")))
        if f.startswith("enh")
    ]
    dnn1_outputs = []
    for (cnt, na) in enumerate(names):
        # Load feature.
        file_path = os.path.join("data_eval", "dnn1_out", na)
        (a, _) = pp.read_audio(file_path)
        enh_complex = pp.calc_sp(a, 'complex')
        dnn1_outputs.append(enh_complex)

    # s2nrs = dnn2.predict("data_eval/dnn1_in", "data_eval/dnn1_out")

    # snr = np.array([5.62, 1.405, 0.703, 0.281])
    # snr = np.array([5.62, 2.81, 1.875, 1.406])
    s2nrs = snr_list * 1
    for i in range(len(snr_list)):
        s2nrs[i] = 1 / (1 + 1 / snr_list[i])

    ch_rw_outputs = []
    # calculate channel weights
    if mode == 'dab':
        new_weights = channel_weights(s2nrs)
        print(new_weights)
        # multiply enhanced audio for the corresponding weight
        for i, p in zip(dnn1_outputs, new_weights):
            ch_rw_outputs.append(p * i)

    # cancel reweighting if db mode
    if mode == 'db':
        new_weights = s2nrs
        print(new_weights)
        ch_rw_outputs = dnn1_outputs

    # execute mvdr
    final = mvdr(dnn1_inputs, ch_rw_outputs)

    (init,
     _) = pp.read_audio(os.path.join('data_eval', 'test_speech', file_name))
    init_sp = pp.calc_sp(init, mode='complex')

    visualize(dnn1_colors(np.abs(init_sp)), dnn1_colors(np.abs(final)),
              "source amplitude", "final amplitude")

    # Recover and save enhanced wav
    pp.create_folder(output_file_folder)
    s = recover_wav_complex(final, conf1.n_overlap, np.hamming)
    s *= np.sqrt((np.hamming(
        conf1.n_window)**2).sum())  # Scaler for compensate the amplitude
    audio_path = os.path.join(output_file_folder, file_name)
    pp.write_audio(audio_path, s, conf1.sample_rate)

    print('%s done' % mode)
Example #38
0
def PlotHamming(data):
    hamm = np.hamming(len(data))
    y = hamm * data
    y = abs(fft(y, n=16384))
    y = y[:y.size // 2]
    return y
Example #39
0
def spectrogram_image(mediafile, dpi=72, outdir=None, outfile=None):
    # TODO: Add some of the constants below as parameters
    """ Create spectrogram image from audio data.
        Return path to created image file.
    """
    import matplotlib
    matplotlib.use('Agg')

    import matplotlib.pyplot as plt
    import scipy.io.wavfile
    import numpy as np
    import pylab

    # Output file path
    outfile = outfile or ""
    if outdir and outfile and os.sep in outfile:
        raise ValueError(
            "Do not specify paths in both output directory '%s' and filename '%s'"
            % (outdir, outfile))

    if os.sep not in outfile:
        if not outfile:
            outfile = os.path.splitext(os.path.basename(mediafile))[0] + ".jpg"
        if not outdir:
            outdir = os.path.dirname(mediafile)
        outfile = os.path.join(outdir, outfile)

    with closing(open(os.devnull, "wb")) as black_hole:
        # Read audio data
        with transcode.to_wav(mediafile) as wavfile:
            sys.stdout, saved_stdout = black_hole, sys.stdout
            try:
                sample_rate, waveform = scipy.io.wavfile.read(wavfile)
            finally:
                sys.stdout = saved_stdout

        # Limit data to 10 second window from the middle, else the FFT needs ages
        data_window = sample_rate * 2  # secs
        waveform = [
            i[0] for i in waveform[(len(waveform) - data_window) //
                                   2:(len(waveform) + data_window) // 2]
        ]
        # TODO: combine / add the channels to mono

        # Calculate FFT inputs
        nstep = int(sample_rate * 0.001)  # 1ms step
        nfft = nwin = int(sample_rate * 0.005) & ~1  # 5ms window
        window = np.hamming(nwin)

        # Create spectrogram
        pylab.nipy_spectral()
        for khz in (5, 10, 16, 18, 20):
            pylab.text(data_window / sample_rate * .99,
                       khz * 1000 + 75,
                       "%d kHz" % khz,
                       ha="right")
            pylab.axhline(khz * 1000)
        pylab.axis("off")
        pylab.specgram(waveform, NFFT=nfft, Fs=sample_rate, window=window)

        # Write to image
        try:
            pylab.savefig(outfile + ".png",
                          format='png',
                          facecolor="#000000",
                          edgecolor="#000000",
                          dpi=dpi,
                          transparent=True,
                          bbox_inches="tight")

            cmd = [
                config.CMD_IM_CONVERT, "-trim", "-quality", "85",
                outfile + ".png", outfile
            ]
            subprocess.check_call(cmd,
                                  stdout=black_hole,
                                  stderr=subprocess.STDOUT)
        finally:
            if os.path.exists(outfile + ".png"):
                os.remove(outfile + ".png")

    return outfile
Example #40
0
def file_feature_extraction(file,
                            win=0.032,
                            step=0.016,
                            amplitudeFilter=False,
                            diffFilter=False):
    # read in digital signal from audio file
    audioInfo = read(file)
    fs = audioInfo[0]  # fs = frames/second = rate
    signal = audioInfo[1]  # signal = data

    # Converting stereo signal to MONO signal
    if len(signal[0]) > 1:
        signal = np.float_(np.sum(signal, axis=1)) / 2

    # short-term feature extraction
    numberOfSamples = len(signal)
    duration = np.float_(numberOfSamples) / fs  # in seconds

    # convert window length and step from seconds to samples
    windowLength = np.int(np.round(win * fs))
    stepInSamples = np.int(np.round(step * fs))

    # compute the total number of frames
    numOfFrames = np.int(
        np.floor((numberOfSamples - windowLength) / stepInSamples) + 1)

    # number of features to be computed:
    numbOfFeatures = 21
    Features = np.zeros((numOfFrames, numbOfFeatures))

    # Frequency-domain audio features
    # MFCC
    Ham = np.hamming(windowLength)
    mfccParams = feature_mfccs_init.feature_mfccs_init(windowLength, fs)

    Win = np.int(windowLength)
    nFFT = Win / 2

    curPos = 1

    ampl_vals = []
    diff_vals = []

    for i in range(0, numOfFrames):  # for each frame
        # get current frame:\
        frame = signal[curPos - 1:curPos + windowLength - 1]
        if i == 0:
            frameprev = frame.copy()

        ampl_val = np.max(frame)  # - np.min(frame)
        ampl_vals.append(ampl_val)

        diff_val = np.subtract(frameprev, frame)
        diff_vals.append(np.mean(diff_val))

        frameprev = frame.copy()
        frame = frame * Ham
        frameFFT = getDFT.getDFT(frame, fs)

        X = np.abs(np.fft.fft(frame))
        X = X[0:nFFT]  # normalize fft
        X = X / len(X)

        if i == 0:
            Xprev = X.copy()

        if np.sum(np.abs(frame)) > np.spacing(1):
            MFCCs = feature_mfccs.feature_mfccs(frameFFT, mfccParams)
            Features[i][0:13] = MFCCs
        else:
            Features[:, i] = np.zeros(numbOfFeatures, 1)
        Features[i][13] = stEnergy(frame)
        Features[i][14] = stZCR(frame)
        Features[i][15] = stEnergyEntropy(frame)
        [Features[i][16], Features[i][17]] = stSpectralCentroidAndSpread(X, fs)
        Features[i][18] = stSpectralEntropy(X)
        Features[i][19] = stSpectralRollOff(X, 0.90, fs)

        curPos = curPos + stepInSamples
        frameFFTPrev = frameFFT
        Xprev = X.copy()

    ampl_threshold = np.percentile(ampl_vals, 93)
    diff_threshold = np.percentile(diff_vals, 80)

    for i in range(0, numOfFrames):
        if amplitudeFilter and ampl_vals[i] < ampl_threshold:
            Features[i][20] = 1.0
        elif diffFilter and diff_vals[i] > diff_threshold:
            Features[i][20] = 1.0
        else:
            Features[i][20] = 0.0

    return Features
Example #41
0
def processWithPV(waveIn, pars, BPF, doPitchShift=False):

    sr = pars['main_pars']['sr']
    inSamples = pars['main_pars']['inSamples']

    if doPitchShift == False:

        # static stretch
        if BPF.size == 2:
            rateVec = 1/BPF[0,1]   # >1: dilation (stretching), <1: compression
            numSeg = 1
        else:
            sampleVec = np.rint(BPF[:,0]*sr)   # in samples
            rateVec = BPF[:,1]
            numSeg = BPF.shape[0]

    else:

        bins_per_octave = 12

        # static pitch shift
        if BPF.size == 2:
            rateVec = 2.0 ** (-float(BPF[0,1]/100.0) / bins_per_octave)  # in libRosa, pitch shifting factors are in semitones. In CLEESE, in cents.
            numSeg = 1
        else:
            sampleVec = np.rint(BPF[:,0]*sr)   # in samples
            rateVec = 2.0 ** (BPF[:,1] / float(100*bins_per_octave))

            numSeg = BPF.shape[0]


    winLen = pars['ana_pars']['anaWinLen']        # in seconds
    n_fft = int(2**np.ceil(np.log2(winLen*sr)))   # next pow 2
    overlapFactor = pars['ana_pars']['oversampling']
    synHop = n_fft//overlapFactor


    if numSeg==1:

        anaHop = int(round(synHop*rateVec))
        numFrames = int(round(inSamples/anaHop)) + overlapFactor
        anaHopVec = np.ones(numFrames, dtype=int) * anaHop

    else:

        pos = 0
        currHop = int(round(synHop/rateVec[0]))
        anaHopVec = currHop
        allRatesVec = rateVec[0]

        while pos <= inSamples:
            pos += currHop
            sampledBPF = np.interp(pos,sampleVec,rateVec)
            currHop = int(round(synHop/sampledBPF))
            anaHopVec = np.append(anaHopVec,currHop)
            allRatesVec = np.append(allRatesVec,sampledBPF)

    win = np.hamming(n_fft)

    stftMat = stft(waveIn,win,n_fft,anaHopVec)

    stftMat = np.squeeze(stftMat)
    stftMat = stftMat[0:n_fft//2+1,:]

    phase_locking = 1

    stft_stretch = phaseVocoder_varHop(stftMat, anaHopVec=anaHopVec, synHop=synHop, phase_locking=phase_locking)

    if doPitchShift==0:

        waveOut = istft(stft_stretch,win,n_fft,synHop)

    else:
        if numSeg==1:

            waveOut = istft(stft_stretch,win,n_fft,synHop)
            n_samples = int(np.ceil(len(waveOut) * rateVec))
            waveOut = sig.resample_poly(waveOut,n_samples,len(waveOut))

        else:

            waveOut = istft_resamp(stft_stretch,win,n_fft,synHop,allRatesVec,inSamples)

    return waveOut
Example #42
0
 def hamming(self):
     self.ws *= np.hamming(len(self.ws))
                              signal[1:] - pre_emphasis * signal[:-1])
frame_size = 0.025
frame_stride = 0.01
frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate
signal_length = len(emphasized_signal)
frame_length = int(round(frame_length))
frame_step = int(round(frame_step))
num_frames = int(
    np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))
pad_signal_length = num_frames * frame_step + frame_length
z = np.zeros((pad_signal_length - signal_length))
pad_signal = np.append(emphasized_signal, z)
indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(
    np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
frames = pad_signal[indices.astype(np.int32, copy=False)]
frames *= np.hamming(frame_length)
m = frames.max(axis=1)
silent_frames = np.array(np.where(frames.max(axis=1) < 500))
frames_final = np.delete(frames, silent_frames[:, :], axis=0)
signal_final2 = python_speech_features.sigproc.deframesig(
    frames_final, len(emphasized_signal), frame_length, frame_step)
plt.plot(signal_final2)

import numpy as np
import scipy.io.wavfile
from matplotlib import cm
import matplotlib.pyplot as plt
from scipy.fftpack import dct
import python_speech_features
from scipy import io
sample_rate, signal = scipy.io.wavfile.read(
Example #44
0
from scipy.signal import hamming, triang, blackmanharris
from scipy.fftpack import fft, ifft
import math
import sys, os, functools, time
sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))

import dftModel as DFT
import utilFunctions as UF

(fs, x) = UF.wavread('../../../sounds/oboe-A4.wav')
N = 512
M = 511
t = -60
w = np.hamming(M)
start = .8 * fs
hN = N / 2
hM = (M + 1) / 2

x1 = x[start:start + M]
mX, pX = DFT.dftAnal(x1, w, N)
ploc = UF.peakDetection(mX, hN, t)
pmag = mX[ploc]
freqaxis = fs * np.arange(N / 2) / float(N)

plt.figure(1, figsize=(9.5, 5.5))
plt.subplot(2, 1, 1)
plt.plot(freqaxis, mX, 'r', lw=1.5)
plt.axis([300, 2500, -70, max(mX)])
plt.plot(fs * ploc / N,
# MatplotlibをTkinterで使用するために必要
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk

size_frame = 4096  # フレームサイズ
SR = 16000  # サンプリングレート
size_shift = 16000 / 100  # シフトサイズ = 0.001 秒 (10 msec)

# 音声ファイルを読み込む
x, _ = librosa.load('aiueo.wav', sr=SR)

# ファイルサイズ(秒)
duration = len(x) / SR

# ハミング窓
hamming_window = np.hamming(size_frame)

# スペクトログラムを保存するlist
spectrogram = []

# フレーム毎にスペクトルを計算
for i in np.arange(0, len(x) - size_frame, size_shift):

    # 該当フレームのデータを取得
    idx = int(i)  # arangeのインデクスはfloatなのでintに変換
    x_frame = x[idx:idx + size_frame]

    # スペクトル
    fft_spec = np.fft.rfft(x_frame * hamming_window)
    fft_log_abs_spec = np.log(np.abs(fft_spec))
    spectrogram.append(fft_log_abs_spec)
Example #46
0
 def hamming(self):
     """Apply a Hamming window to the wave.
     """
     self.ys *= np.hamming(len(self.ys))
Example #47
0
    def run(self, cam):
        print 'start'

        self.frame_out = self.frame_in
        self.gray = cv2.equalizeHist(
            cv2.cvtColor(self.frame_in, cv2.COLOR_BGR2GRAY))
        col = (100, 255, 100)

        detected = list(
            self.face_cascade.detectMultiScale(self.gray,
                                               scaleFactor=1.3,
                                               minNeighbors=4,
                                               minSize=(50, 50),
                                               flags=cv2.CASCADE_SCALE_IMAGE))
        if len(detected) > 0:
            detected.sort(key=lambda a: a[-1] * a[-2])
            #self.data_buffer, self.times, self.trained = [], [self.times[-1]], False
            if self.shift(detected[-1]) > 10:
                self.face_rect = detected[-1]
        forehead1 = self.get_subface_coord(0.5, 0.18, 0.25, 0.15)
        self.draw_rect(self.face_rect, col=(255, 0, 0))
        x, y, w, h = self.face_rect
        cv2.putText(self.frame_out, "Face", (x, y), cv2.FONT_HERSHEY_PLAIN,
                    1.5, col)
        self.draw_rect(forehead1)
        x, y, w, h = forehead1
        cv2.putText(self.frame_out, "Forehead", (x, y), cv2.FONT_HERSHEY_PLAIN,
                    1.5, col)
        print 'haha'
        if set(self.face_rect) == set([1, 1, 2, 2]):
            print 'break'
            return
        vals = self.get_subface_means(forehead1)
        #print vals
        self.data_buffer.append(vals)
        #print self.data_buffer
        L = len(self.data_buffer)
        self.times.append(time.time() - self.t0)
        if L > self.buffer_size:
            self.data_buffer = self.data_buffer[-self.buffer_size:]
            self.times = self.times[-self.buffer_size:]
            L = self.buffer_size

        processed = np.array(self.data_buffer)

        self.samples = processed
        print L
        #print self.times
        if L > 10:
            self.output_dim = processed.shape[0]

            self.fps = float(L) / (self.times[-1] - self.times[0])
            even_times = np.linspace(self.times[0], self.times[-1], L)
            print 'processed\n', len(processed), '\nself\n', len(self.times)
            interpolated = np.interp(even_times, self.times, processed)
            interpolated = np.hamming(L) * interpolated
            interpolated = interpolated - np.mean(interpolated)
            raw = np.fft.rfft(interpolated)
            phase = np.angle(raw)
            self.fft = np.abs(raw)
            self.freqs = float(self.fps) / L * np.arange(L / 2 + 1)

            freqs = 60. * self.freqs
            idx = np.where((freqs > 50) & (freqs < 180))

            pruned = self.fft[idx]
            phase = phase[idx]

            pfreq = freqs[idx]
            self.freqs = pfreq
            self.fft = pruned
            idx2 = np.argmax(pruned)

            t = (np.sin(phase[idx2]) + 1.) / 2.
            t = 0.9 * t + 0.1
            alpha = t
            beta = 1 - t

            self.bpm = self.freqs[idx2]
            self.idx += 1

            x, y, w, h = self.get_subface_coord(0.5, 0.18, 0.25, 0.15)
            r = alpha * self.frame_in[y:y + h, x:x + w, 0]
            g = alpha * \
                self.frame_in[y:y + h, x:x + w, 1] + \
                beta * self.gray[y:y + h, x:x + w]
            b = alpha * self.frame_in[y:y + h, x:x + w, 2]
            self.frame_out[y:y + h, x:x + w] = cv2.merge([r, g, b])
            x1, y1, w1, h1 = self.face_rect
            self.slices = [np.copy(self.frame_out[y1:y1 + h1, x1:x1 + w1, 1])]
            col = (100, 255, 100)
            gap = (self.buffer_size - L) / self.fps
            # self.bpms.append(bpm)
            # self.ttimes.append(time.time())
            if gap:
                text = "(estimate: %0.1f bpm, wait %0.0f s)" % (self.bpm, gap)
            else:
                text = "(estimate: %0.1f bpm)" % (self.bpm)
            tsize = 1
            cv2.putText(self.frame_out, text, (int(x - w / 2), int(y)),
                        cv2.FONT_HERSHEY_PLAIN, tsize, col)
    # 短時間フーリエ変換をしたときの
    # 総フレーム数を計算する
    num_frames = (num_samples - frame_size) \
               // frame_shift + 1

    # スペクトログラムを計算する
    spectrogram = np.zeros((num_frames, fft_size))
    for frame_idx in range(num_frames):
        # 1フレーム分の波形を抽出
        start_index = frame_idx * frame_shift
        frame = waveform[start_index : \
                         start_index + frame_size].copy()

        # 対数振幅スペクトルを計算
        frame = frame * np.hamming(frame_size)
        spectrum = np.fft.fft(frame, n=fft_size)
        log_absolute = np.log(np.abs(spectrum) + 1E-7)
        spectrogram[frame_idx, :] = log_absolute

    # プロットの描画領域を作成
    plt.figure(figsize=(10, 10))

    # 描画領域を縦に2分割し、
    # 上側に時間波形をプロットする
    plt.subplot(2, 1, 1)
    time_axis = np.arange(num_samples) / sample_frequency
    plt.plot(time_axis, waveform, color='k')

    # waveformの最大値を元に縦軸の最大値を決める
    ymax = np.max(np.abs(waveform)) * 1.05
Example #49
0
        results.append(d2**2 + d1**2 - w_real * d1 * d2)
        freqs.append(f * sample_rate)
    return freqs, results


if __name__ == '__main__':
    # quick test
    import numpy as np
    import pylab

    # generating test signals
    SAMPLE_RATE = 44100
    WINDOW_SIZE = 1024
    t = np.linspace(0, 1, SAMPLE_RATE)[:WINDOW_SIZE]
    sine_wave = np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 1020 * t)
    sine_wave = sine_wave * np.hamming(WINDOW_SIZE)
    sine_wave2 = np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1500 * t)
    sine_wave2 = sine_wave2 * np.hamming(WINDOW_SIZE)

    # applying Goertzel on those signals, and plotting results
    freqs, results = goertzel(sine_wave, SAMPLE_RATE, (400, 500), (1000, 1100))

    pylab.subplot(2, 2, 1)
    pylab.title('(1) Sine wave 440Hz + 1020Hz')
    pylab.plot(t, sine_wave)

    pylab.subplot(2, 2, 3)
    pylab.title(
        '(1) Goertzel Algo, freqency ranges : [400, 500] and [1000, 1100]')
    pylab.plot(freqs, np.array(results)[:, 2], 'o')
    pylab.ylim([0, 100000])
Example #50
0
#!/usr/bin/python3
# -*- coding:utf-8 -*-

import time
import ADS1256
import DAC8532
import RPi.GPIO as GPIO
from pylab import *
import matplotlib.pyplot as plt
import numpy as np

N = 128
analyzeSignal = zeros(N, dtype=complex)
C = 3 * 10**8
f_vco = 24.35 * 10**9
windowFunction = np.hamming(N)

try:
    ADC = ADS1256.ADS1256()
    DAC = DAC8532.DAC8532()
    ADC.ADS1256_init()

    DAC.DAC8532_Out_Voltage(DAC8532.channel_A, 1)
    DAC.DAC8532_Out_Voltage(DAC8532.channel_B, 1)

    while (1):
        #starttime = datetime.datetime.now()
        for i in range(0, N):
            ADC_Value = ADC.ADS1256_GetIQ()
            I = ADC_Value[0] * 5.0 / 0x7fffff
            Q = ADC_Value[1] * 5.0 / 0x7fffff
Example #51
0
import numpy as np
import time, os, sys

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../software/models/'))

import stft as STFT
import utilFunctions as UF
import matplotlib.pyplot as plt
from scipy.signal import hamming

(fs, x) = UF.wavread('../../../sounds/piano.wav')
w = np.hamming(1024)
N = 1024
H = 512
mX, pX = STFT.stftAnal(x, fs, w, N, H)
y = STFT.stftSynth(mX, pX, w.size, H)

plt.figure(1, figsize=(9.5, 7))
plt.subplot(411)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.title('x (piano.wav)')
plt.axis([0, x.size / float(fs), min(x), max(x)])

plt.subplot(412)
numFrames = int(mX[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(mX[0, :].size) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
plt.title('mX, M=1024, N=1024, H=512')
Example #52
0
def spectral_analysis(dx,Ain,res_factor=10.0,tapering=None,overlap=None,wsize=None,alpha=3.0,detrend=False,normalise=False,integration=False):
    """
     Spectral_Analysis
     @summary: This function performs a spatial spectral analysis with different options on a time series of SLA profiles.
     @param dx {type:numeric} : sampling distance
     @param Ain {type:numeric} : 2D table of sla data with time along 2nd axis (NXxNT with NX the spatial length and NT the time length)
     @keyword tapering {type:string|bool|nd.array} : apply tapering to the data. <br \>
                    If this keyword is of type bool : apply hamming window. <br \>
                    If this keyword is a string : apply a hamming ('hamm'), hann ('hann'), kaiser-bessel ('kaiser'), kaiser-bessel ('blackman') or no ('none') tapering function. <br \>
                    If this keyword is an nd.array aobject : apply this array as taper.
     @keyword overlap {type:float} : overlap coefficient of the windows (0.75 means 75% overlap).
     @keyword wsize {type:numeric} : size of the sub-segments.
     @keyword normalise {type:bool,default:False} : If True, normalise the spectrum by its overall energy content.
     @keyword detrend {type:bool,default:False} : If True, removes a linear trend to the segmented signal (if tapered) or to the whole signal (if not tapered).
     @keyword integration {type:bool,default:False} : If True, integrate the spectrum between 2 frequencies. 
     @param sla {type:numeric} : data
     @return: a spectrum structrue with Energy Spectral Density ('esd'), Power Spectral Density ('PSD'), frequency ('fq'), wavelength ('p') and tapering parameters.
    
     @author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer
     @change: Created by RD, December 2012
    """
    
    A=Ain.copy()

    #Check dimensions
    sh = A.shape
    ndims = len(sh)
    N = sh[0] #Time series are found along the last dimension
    
    #If vector, add one dimension
    if ndims == 1 :
        A = A.reshape((N,1))
        sh = A.shape
        ndims = len(sh)
    
    nr = sh[1] #Number of repeats  
    nt = nr
    
#    gain=1.0 #Scaling gain... (used for tapering issues)
    
#    #Get the overall energy
#    spec=get_spec(dx, A[:,0])
#    F=spec['fq']   
#    Eref = ((A[:,0]-A[:,0].mean())**2).sum() #Get the reference energy level
#    ESDref=spec['esd']
#    SFactor=Eref/spec['esd'].sum()
#    ESDref*=SFactor
#    PSDref=spec['psd']*SFactor
#    print 'Check parseval theorem : SUM|Y(f)|�={0}, SUM|y(t)|�={1}'.format(spec['esd'].sum(),((A[:,0]-A[:,0].mean())**2).sum())
    
    #Apply tapering if asked
    ########################
    if tapering is not None:
        
        #Set tapering defaults
        overlap=0.50 if overlap is None else overlap
        wsize=0.5*N if wsize is None else wsize

        #Get time splitting (tapering) parameters
        #########################################
        a = np.float32(wsize)
        b = np.float32(overlap) 
        c = np.float32(N) 
        nn=np.floor((c - (a * b))/(a - (a * b))) #This is the number of segments
        print 'Number of windows :{0}\nTotal windowed points : {1} ({2} missing)\nTotal points : {3}'.format(nn,nn*wsize,N - nn*wsize,N)
        
        ix = np.arange(nn) * ((1.0 - b) * a) #These are the starting points of each segments

        #Moving window
        ##############
        dum = np.zeros((wsize, nn, nr),dtype=np.float64)
        for j in np.arange(nr):
            for i in np.arange(nn): #looping through time to get splitted time series 
                dum[:,i,j] = detrend_fun(np.arange(wsize),A[ix[i] : ix[i] + wsize,j]) if detrend else A[ix[i] : ix[i] + wsize,j]
        
        #Set up tapering window
        #######################
        beta=np.pi*alpha
        hamm = np.hamming(wsize)
        hann = np.hanning(wsize)
        kbess = np.kaiser(wsize,beta)
        blackman = np.blackman(wsize)
        notaper = np.ones(wsize) #overpass tapering option
        gain=1.0
        
        if isinstance(tapering,bool) : which='hamm'
        elif isinstance(tapering,str) :
            if tapering.upper() == 'HAMMING' :
                which='hamm'
                gain=np.sum(hamm)/wsize #0.530416666667
            elif tapering.upper() == 'HANNING' :
                which='hann'
                gain=np.sum(hann)/wsize #0.489583333333
            elif tapering.upper() == 'KAISER' :
                which='kbess'
                gain=np.sum(kbess)/wsize #0.394170357504
            elif tapering.upper() == 'NONE' :
                which='notaper'
                gain=1.0
            elif tapering.upper() == 'BLACKMAN' :
                which='blackman'
                gain=np.sum(blackman)/wsize
            else : raise Exception('Unknown taper {0}'.format(tapering))
        elif isinstance(tapering,np.ndarray) : pass
        else :
            raise Exception('Bad value for tapering keyword')
        if not isinstance(tapering,np.ndarray) : exec('window='+which)
        else : window=tapering
        window = np.repeat(window,nn*nr).reshape((wsize,nn,nr))
    
        #Apply tapering on segmented data
        A=dum.copy()*window
        A=A.reshape(wsize,nr*nn) #Reshapa matrix
        nr=nn*nr
    else :
        if detrend :
            for i in np.arange(nr): A[:,i] = detrend_fun(np.arange(N),A[:,i]) if detrend else A[:,i]
        gain=1.0
    
    #Run transform
    ###############
    for i in np.arange(nr):
        spec=get_spec(dx, A[:,i],integration=integration,gain=gain,res_factor=res_factor)
        if i == 0:
            esd = spec['esd']
            psd = spec['psd']
            fq = spec['fq']
        else : 
            esd = np.append(esd,spec['esd'])
            psd = np.append(psd,spec['psd'])
    
#    factor=((A[:,0]-A[:,0].mean())**2).sum()/spec['esd'].sum()
    
    #Average spectrum
    #################
    nf=len(fq)
    p=1./fq
    esd=esd.reshape(nr,nf)
    psd=psd.reshape(nr,nf)
    esd=(np.sum(esd,axis=0)/nr)#/gain
    psd=(np.sum(psd,axis=0)/nr)#/gain
    
    psd = psd * (gain**0.5)
#    print gain, np.sqrt(gain), gain **2, gain*0.5, gain/2.
#    esd=(np.sum(esd,axis=0))#/gain
#    psd=(np.sum(psd,axis=0))#/gain


    #Normalise by energy content    
    Scaling_Factor=len(fq)/esd.sum()
    if normalise :
        esd*=Scaling_Factor
        psd*=Scaling_Factor
    
    if tapering is not None : return {'params':{'tapering':tapering is not None,'which':which,'wsize':int(wsize),'nwind':int(nn),'overlap':int(100.*overlap),'gain':gain},'psd':psd,'esd':esd,'fq':fq,'p':p}
    else : return {'params':{'tapering':tapering is not None},'psd':psd,'esd':esd,'fq':fq,'p':p}
Example #53
0
def denoise_wav(src_wav_file, dest_wav_file, global_mean, global_var, use_gpu,
                gpu_id, truncate_minutes, mode, model_select, stage_select):
    """Apply speech enhancement to audio in WAV file.

    Parameters
    ----------
    src_wav_file : str
        Path to WAV to denosie.

    dest_wav_file : str
        Output path for denoised WAV.

    global_mean : ndarray, (n_feats,)
        Global mean for LPS features. Used for CMVN.

    global_var : ndarray, (n_feats,)
        Global variances for LPS features. Used for CMVN.

    use_gpu : bool, optional
        If True and GPU is available, perform all processing on GPU.
        (Default: True)

    gpu_id : int, optional
         Id of GPU on which to do computation.
         (Default: 0)

    truncate_minutes: float
        Maximimize size in minutes to process at a time. The enhancement will
        be done on chunks of audio no greather than ``truncate_minutes``
        minutes duration.
    """
    # Read noisy audio WAV file. As scipy.io.wavefile.read is FAR faster than
    # librosa.load, we use the former.
    rate, wav_data = wav_io.read(src_wav_file)

    if mode == 1:
        print(
            "###Selecting the estimated ideal-ratio-masks in mode 1 (more conservative).###"
        )
    elif mode == 2:
        print(
            "###Selecting the estimated log-power-spec features in mode 2 (more agressive).###"
        )
    elif mode == 3:
        print(
            "###Selecting both estimated IRM and LPS outputs with equal weights in mode 3 (trade-off).###"
        )

    print("Using the pre-trained {} speech enhancement model.".format(
        model_select))

    # Apply peak-normalization.
    wav_data = utils.peak_normalization(wav_data)

    # Perform denoising in chunks of size chunk_length samples.
    chunk_length = int(truncate_minutes * rate * 60)
    total_chunks = int(math.ceil(wav_data.size / chunk_length))
    data_se = []  # Will hold enhanced audio data for each chunk.
    for i in range(1, total_chunks + 1):
        tmp_dir = tempfile.mkdtemp()
        try:
            # Get samples for this chunk.
            bi = (i - 1) * chunk_length  # Index of first sample of this chunk.
            ei = bi + chunk_length  # Index of last sample of this chunk + 1.
            temp = wav_data[bi:ei]
            print('Processing file: %s, segment: %d/%d.' %
                  (src_wav_file, i, total_chunks))

            # Skip denoising if chunk is too short.
            if temp.shape[0] < WL2:
                data_se.append(temp)
                continue

            # Determine paths to the temporary files to be created.
            noisy_normed_lps_fn = os.path.join(tmp_dir, 'noisy_normed_lps.htk')
            noisy_normed_lps_scp_fn = os.path.join(tmp_dir,
                                                   'noisy_normed_lps.scp')
            outputs_fn = os.path.join(tmp_dir, 'irm.mat')

            # Extract LPS features from waveform.
            noisy_htkdata = utils.wav2logspec(temp, window=np.hamming(WL))

            # Do MVN before decoding.
            normed_noisy = (noisy_htkdata - global_mean) / global_var

            # Write features to HTK binary format making sure to also
            # create a script file.
            #utils.write_htk(
            #     noisy_normed_lps_fn, normed_noisy, samp_period=SR,
            #    parm_kind=9)

            if model_select.lower() == '400h':
                utils.write_htk(noisy_normed_lps_fn,
                                normed_noisy,
                                samp_period=SR,
                                parm_kind=9)
            elif model_select.lower() == '1000h':
                utils.write_htk(
                    noisy_normed_lps_fn,
                    noisy_htkdata,
                    samp_period=SR,
                    parm_kind=9
                )  ### The 1000h model already integrates MVN inside itself.

            cntk_len = noisy_htkdata.shape[0] - 1
            with open(noisy_normed_lps_scp_fn, 'w') as f:
                f.write('irm=%s[0,%d]\n' % (noisy_normed_lps_fn, cntk_len))

            # Apply CNTK model to determine ideal ratio mask (IRM), which will
            # be output to the temp directory as irm.mat. In order to avoid a
            # memory leak, must do this in a separate process which we then
            # kill.
            #def decode_model(features_file, irm_mat_dir, feature_dim, use_gpu=True,
            #                gpu_id=0, mode=1, model_select='400h', stage_select=3):

            p = Process(target=decode_model,
                        args=(noisy_normed_lps_scp_fn, tmp_dir, NFREQS,
                              use_gpu, gpu_id, mode, model_select,
                              stage_select))
            p.start()
            p.join()
            if p.exception:
                e, tb = p.exception
                raise type(e)(tb)

            # Read in IRM and directly mask the original LPS features.
            irm = sio.loadmat(outputs_fn)['IRM']
            lps = sio.loadmat(outputs_fn)['LPS']

            if mode == 1:
                recovered_lps = noisy_htkdata + np.log(irm)
            elif mode == 2:
                recovered_lps = (lps * global_var) + global_mean
            elif mode == 3:
                recovered_lps = 0.5 * (noisy_htkdata + np.log(irm)) + 0.5 * (
                    (lps * global_var) + global_mean)

            # Reconstruct audio.
            wave_recon = utils.logspec2wav(recovered_lps,
                                           temp,
                                           window=np.hamming(WL),
                                           n_per_seg=WL,
                                           noverlap=WL2)
            data_se.append(wave_recon)
        finally:
            shutil.rmtree(tmp_dir)
    data_se = [x.astype(np.int16, copy=False) for x in data_se]
    data_se = np.concatenate(data_se)
    wav_io.write(dest_wav_file, SR, data_se)
Example #54
0
    def run(self):

        frame, face_frame, ROI1, ROI2, status, mask = self.fd.face_detect(
            self.frame_in)

        self.frame_out = frame
        self.frame_ROI = face_frame

        g1 = self.extractColor(ROI1)
        g2 = self.extractColor(ROI2)
        #g3 = self.extractColor(ROI3)

        L = len(self.data_buffer)

        #calculate average green value of 2 ROIs
        #r = (r1+r2)/2
        g = (g1 + g2) / 2
        #b = (b1+b2)/2

        if (
                abs(g - np.mean(self.data_buffer)) > 10 and L > 99
        ):  #remove sudden change, if the avg value change is over 10, use the mean of the data_buffer
            g = self.data_buffer[-1]

        self.times.append(time.time() - self.t0)
        self.data_buffer.append(g)

        #only process in a fixed-size buffer
        if L > self.buffer_size:
            self.data_buffer = self.data_buffer[-self.buffer_size:]
            self.times = self.times[-self.buffer_size:]
            self.bpms = self.bpms[-self.buffer_size // 2:]
            L = self.buffer_size

        processed = np.array(self.data_buffer)

        # start calculating after the first 10 frames
        if L == self.buffer_size:

            self.fps = float(L) / (
                self.times[-1] - self.times[0]
            )  #calculate HR using a true fps of processor of the computer, not the fps the camera provide
            even_times = np.linspace(self.times[0], self.times[-1], L)

            processed = signal.detrend(
                processed
            )  #detrend the signal to avoid interference of light change
            interpolated = np.interp(even_times, self.times,
                                     processed)  #interpolation by 1
            interpolated = np.hamming(
                L
            ) * interpolated  #make the signal become more periodic (advoid spectral leakage)
            #norm = (interpolated - np.mean(interpolated))/np.std(interpolated)#normalization
            norm = interpolated / np.linalg.norm(interpolated)
            raw = np.fft.rfft(
                norm *
                30)  #do real fft with the normalization multiplied by 10

            self.freqs = float(self.fps) / L * np.arange(L / 2 + 1)
            freqs = 60. * self.freqs

            # idx_remove = np.where((freqs < 50) & (freqs > 180))
            # raw[idx_remove] = 0

            self.fft = np.abs(raw)**2  #get amplitude spectrum

            idx = np.where(
                (freqs > 50) & (freqs < 180)
            )  #the range of frequency that HR is supposed to be within
            pruned = self.fft[idx]
            pfreq = freqs[idx]

            self.freqs = pfreq
            self.fft = pruned

            idx2 = np.argmax(pruned)  #max in the range can be HR

            self.bpm = self.freqs[idx2]
            self.bpms.append(self.bpm)

            processed = self.butter_bandpass_filter(processed,
                                                    0.8,
                                                    3,
                                                    self.fps,
                                                    order=3)
            #ifft = np.fft.irfft(raw)
        self.samples = processed  # multiply the signal with 5 for easier to see in the plot
        #TODO: find peaks to draw HR-like signal.

        if (mask.shape[0] != 10):
            out = np.zeros_like(face_frame)
            mask = mask.astype(np.bool)
            out[mask] = face_frame[mask]
            if (processed[-1] > np.mean(processed)):
                out[mask, 2] = 180 + processed[-1] * 10
            face_frame[mask] = out[mask]
Example #55
0
def mfcc(sample_rate, signal):
    pre_emphasis = 0.97
    signal = signal[0:int(1 * sample_rate)]
    emphasized_signal = numpy.append(signal[0],
                                     signal[1:] - pre_emphasis * signal[:-1])
    frame_size = 0.025
    frame_stride = 0.01

    frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate  # CONVERTING TO SAMPLES
    signal_length = len(emphasized_signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    num_frames = int(
        numpy.ceil(
            float(numpy.abs(signal_length - frame_length)) / frame_step))

    pad_signal_length = num_frames * frame_step + frame_length
    z = numpy.zeros((pad_signal_length - signal_length))
    pad_signal = numpy.append(emphasized_signal, z)

    indices = numpy.tile(numpy.arange(
        0, frame_length), (num_frames, 1)) + numpy.tile(
            numpy.arange(0, num_frames * frame_step, frame_step),
            (frame_length, 1)).T
    frames = pad_signal[indices.astype(numpy.int32, copy=False)]

    frames *= numpy.hamming(frame_length)
    NFFT = 512
    mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT))
    pow_frames = ((1.0 / NFFT) * ((mag_frames)**2))

    nfilt = 40
    low_freq_mel = 0
    high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700))
    mel_points = numpy.linspace(low_freq_mel, high_freq_mel, nfilt + 2)
    hz_points = (700 * (10**(mel_points / 2595) - 1))
    bin = numpy.floor((NFFT + 1) * hz_points / sample_rate)

    fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])
        f_m = int(bin[m])
        f_m_plus = int(bin[m + 1])

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    filter_banks = numpy.dot(pow_frames, fbank.T)
    filter_banks = numpy.where(filter_banks == 0,
                               numpy.finfo(float).eps, filter_banks)
    filter_banks = 20 * numpy.log10(filter_banks)
    num_ceps = 12
    mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1:(num_ceps + 1)]
    cep_lifter = 22
    (nframes, ncoeff) = mfcc.shape
    n = numpy.arange(ncoeff)
    # print numpy.shape(mfcc)
    lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter)
    mfcc *= lift
    mfcc -= (numpy.mean(mfcc, axis=0) + 1e-8)
    return mfcc
Example #56
0
import os
import csv
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import parselmouth
import librosa
import seaborn as sns
import scipy.io.wavfile as wav

DEFAULT_INPUT_DIR = 'raw_wav'
DEFAULT_OUTPUT_DIR = 'raw_csv'
WINFUNC = lambda x: np.hamming(x)


def get_non_silence_idx_range_from_pitch(wav_name: str,
                                         pitch_csv_path: str) -> [int, int]:
    corresponding_pitch_csv = f'pitch-{wav_name.replace(".wav", ".csv")}'
    df = pd.read_csv(f'{pitch_csv_path}/{corresponding_pitch_csv}')

    candidate = []
    start_idx = 0
    end_idx = df.shape[0] + 1
    count = 0
    is_counting = False
    i = 0
    for i in range(len(df['F0'])):
        is_counting = df['F0'][i] != 0
        if is_counting:
            if count == 0:
Example #57
0
                    noverlap=None,
                    fs=sample_rate,
                    nperseg=window_length)
f2, t2, Zxx2 = stft(signal[7000:],
                    window='boxcar',
                    noverlap=None,
                    fs=sample_rate,
                    nperseg=window_length)
rect_signal = []
hamm_signal = []
# Loop and calculate the signal multipled by respective window
for i in range(7000, len(signal), window_length):
    if len(signal[i:i + window_length]) != window_length:
        break
    hamm_signal.append(
        (signal[i:i + window_length] * np.hamming(window_length)))
    rect_signal.append((signal[i:i + window_length] *
                        scipy.signal.windows.boxcar(window_length)))

fig, axs = plt.subplots(3, 2)
axs[0, 0].plot(time, signal)
axs[0, 0].set_title('Original waveform')
axs[0, 0].set_ylabel('Amplitude')
axs[0, 0].set_xlabel('Time [sec]')
axs[1, 0].plot(rect_signal)
axs[1, 0].set_title('Signal multipled by 160 points rectangular window')
axs[1, 0].set_ylabel('Filtered amplitude')
axs[2, 0].pcolormesh(t2,
                     f2,
                     np.abs(Zxx2),
                     vmin=0,
Example #58
0
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# データのパラメータ
N = 36000  # サンプル数
dt = 0.01  # サンプリング間隔
fc1 = 0.5  # カットオフ周波数1[Hz]
fc2 = 35.0  # カットオフ周波数2[Hz]
A1, A2 = 20, 5
t = np.arange(0, N * dt, dt)  # 時間軸
freq = np.linspace(0, 1.0 / dt, N)  # 周波数軸
hamm = np.hamming(N)  # ハミング窓

# CSVのロード
df = pd.read_csv(
    "C:/github/sample/python/numpy/fft/strong-motion/2011-03-11-14-46-30-miyazaki-oketanimachi/ex2/data.csv",
    encoding="UTF-8",
    skiprows=6)

# 3列目(UD)のデータ(加速度)終値だけを取り出し

f = df["UD"]
# 高速フーリエ変換(周波数信号に変換)
F = np.fft.fft(f * hamm)

# 正規化 + 交流成分2倍
F = F / (N / 2)

# 直流成分は等倍に戻す
Example #59
0
def FFT_AMP(data):
        data= data - data.mean()
        data=np.hamming(len(data))*data
        data=np.fft.fft(data)
        data=np.abs(data)
        return data
Example #60
0
    def noise_filter(self, x, fs=16000):
        # 计算参数
        window_length = 20 * fs // 1000  # 样本中帧的大小
        PERC = 50  # 窗口重叠占帧的百分比
        cover_window = window_length * PERC // 100  # 重叠窗口
        uncover_window = window_length - cover_window  # 非重叠窗口
        # 设置默认参数
        Thres = 3
        Expnt = 2.0
        beta = 0.002
        G = 0.9
        # 初始化汉明窗
        win = np.hamming(window_length)
        # normalization gain for overlap+add with 50% overlap
        winGain = uncover_window / sum(win)

        # Noise magnitude calculations - assuming that the first 5 frames is noise/silence
        nFFT = 2 * 2**(self._nextpow2(window_length))
        noise_mean = np.zeros(nFFT)

        j = 0
        for k in range(1, 6):
            noise_mean = noise_mean + abs(
                np.fft.fft(win * x[j:j + window_length], nFFT))
            j = j + window_length
        noise_mu = noise_mean / 5

        # --- allocate memory and initialize various variables
        k = 1
        img = 1j
        x_old = np.zeros(cover_window)
        Nframes = len(x) // uncover_window - 1
        xfinal = np.zeros(Nframes * uncover_window)

        # =========================    Start Processing   ===============================
        for n in range(0, Nframes):
            # Windowing
            insign = win * x[k - 1:k + window_length - 1]
            # compute fourier transform of a frame
            spec = np.fft.fft(insign, nFFT)
            # compute the magnitude
            sig = abs(spec)

            # save the noisy phase information
            theta = np.angle(spec)
            SNRseg = 10 * np.log10(
                np.linalg.norm(sig, 2)**2 / np.linalg.norm(noise_mu, 2)**2)

            if Expnt == 1.0:  # 幅度谱
                alpha = self._berouti(SNRseg, 3)
            else:  # 功率谱
                alpha = self._berouti(SNRseg, 4)

            sub_speech = sig**Expnt - alpha * noise_mu**Expnt
            # 当纯净信号小于噪声信号的功率时
            diffw = sub_speech - beta * noise_mu**Expnt
            # beta negative components

            z = [i for i, frame in enumerate(diffw) if frame < 0]

            if len(z) > 0:
                sub_speech[z] = beta * noise_mu[z]**Expnt
            if SNRseg < Thres:  # Update noise spectrum
                noise_temp = G * noise_mu**Expnt + (
                    1 - G) * sig**Expnt  # 平滑处理噪声功率谱
                noise_mu = noise_temp**(1 / Expnt)  # 新的噪声幅度谱
            # flipud函数实现矩阵的上下翻转,是以矩阵的“水平中线”为对称轴
            # 交换上下对称元素
            sub_speech[nFFT // 2 + 1:nFFT] = np.flipud(sub_speech[1:nFFT // 2])
            x_phase = (sub_speech**
                       (1 / Expnt)) * (np.array([math.cos(x)
                                                 for x in theta]) + img *
                                       (np.array([math.sin(x)
                                                  for x in theta])))
            # take the IFFT

            xi = np.fft.ifft(x_phase).real
            # --- Overlap and add ---------------
            xfinal[k - 1:k + uncover_window - 1] = x_old + xi[0:cover_window]
            x_old = xi[0 + cover_window:window_length]
            k = k + uncover_window

        return np.array(xfinal * winGain, dtype=np.short)