예제 #1
0
    def frame_wceps(self, frame_in):
        """Returns warped MFCCs."""

        # Calculate blackman-windowed cepstrum
        if self.window is None:
            self.window = scipy.blackman(frame_in.shape[0] + 1)[:-1]
            self.window /= np.sum(self.window)

        windowed_frame = self.window * frame_in.flatten()
        windowed_frame -= np.mean(windowed_frame)

        power_spec = np.fft.fft(windowed_frame)
        power_spec = np.real(power_spec) * np.real(power_spec) + np.imag(
            power_spec) * np.imag(power_spec)
        cepstrum = np.fft.ifft(np.log(power_spec + 0.00001))

        # Scale the first and the middle coefficients by 0.5
        cepstrum = np.real(cepstrum.reshape(1, cepstrum.shape[0]))
        cepstrum[:, 0] = cepstrum[:, 0] * 0.5
        cepstrum[:, len(self.window) //
                 2] = cepstrum[:, len(self.window) // 2] * 0.5

        warped_cepstrum_fs = self.warper.warpSequence(
            [bk.FeatureSequence(cepstrum, True, True)],
            int(len(self.window) / 2), self.num_wceps, 0.42)[0]  # TODO ???

        power_spec = power_spec.reshape(1, power_spec.shape[0])
        try:
            warped_cepstrum_fsOpt = self.warper.minimizeEstimationBias(
                warped_cepstrum_fs, bk.FeatureSequence(power_spec, True, True),
                5, 0.42)
        except ValueError:
            warped_cepstrum_fsOpt = warped_cepstrum_fs

        return (warped_cepstrum_fs.getMatrix().flatten())
예제 #2
0
def stft(x, fftsize, overlap, beta):
    if beta == 0:
        hop = fftsize / overlap
        w = sc.blackman(
            fftsize + 1)[:-1]  # better reconstruction with this trick +1)[:-1]
        X = np.array([
            np.fft.rfft(w * x[i:i + fftsize])
            for i in range(0,
                           len(x) - fftsize, hop)
        ])
        y = np.linspace(0, bins[-1], np.shape(X)[1])
        x = np.linspace(0, tid[-1], np.shape(X)[0])
        return X, x, y
    if beta != 0:
        hop = fftsize / overlap
        w = sc.hanning(
            fftsize + 1)[:-1]  # better reconstruction with this trick +1)[:-1]
        X = np.array([
            np.fft.rfft(w * x[i:i + fftsize])
            for i in range(0,
                           len(x) - fftsize, hop)
        ])
        y = np.linspace(0, bins[-1], np.shape(X)[1])
        x = np.linspace(0, tid[-1], np.shape(X)[0])
        return X, x, y
예제 #3
0
def windowWeight(sig, window_size):
 output = np.array([], dtype=np.float64)
 w = scipy.blackman(window_size)
 l = len(sig) - window_size
 for i in range(0, l, window_size):
  output = np.append(output, w*sig[i:i+window_size])
 output = np.append(output, w*sig[len(sig)-window_size:len(sig)])
 return output
예제 #4
0
    def extract_feats(self, frame_in):
        """Returns warped MFCCs."""

        # Calculate blackman-windowed cepstrum
        if self.window is None:
            self.window = scipy.blackman(frame_in.shape[0] + 1)[:-1]
            self.window /= np.sum(self.window)

        windowed_frame = self.window * frame_in.flatten()
        windowed_frame -= np.mean(windowed_frame)

        result_frames = np.array([
            np.sum([x**2 for x in windowed_frame], axis=0) /
            float(windowed_frame.shape[0])
        ])
        return result_frames.reshape(-1, 1)
예제 #5
0
def plot_spectrogram(waveform, sampling_rate, window_name, filename):
    """
    スペクトログラムを表示
    """

    window_duration = 40.0 * 1.0e-3  # 窓関数の長さ、単位は秒
    window_shift = 5.0 * 1.0e-3  # 窓関数をスライドさせる長さ、単位は秒
    window_size = int(window_duration * sampling_rate)  # 窓関数のサンプル数
    window_overlap = int((window_duration - window_shift) * sampling_rate)  # 隣接する窓関数の重なり

    # 窓関数本体
    if window_name == "hanning":
        window = scipy.hanning(window_size)  # ハニング窓
    elif window_name == "hamming":
        window = scipy.hamming(window_size)  # ハミング窓
    elif window_name == "gaussian":
        window = scipy.gaussian(window_size)  # ガウス窓??
    elif window_name == "blackman":
        window = scipy.blackman(window_size)  # ブラックマン窓
    elif window_name == "trianglar":
        window = scipy.triang(window_size)  # 三角窓??
    elif window_name == "rectanglar":
        window = scipy.rectang(window_size)  # 矩形窓??
    else:
        print "The window function name is wrong."
        exit()

    sp, freqs, times, ax = plt.specgram(
        waveform,
        NFFT=window_size,
        Fs=sampling_rate,
        window=window,
        noverlap=window_overlap
    )

    plt.title("Spectrogram [" + window_name + "] (" + filename + ")")
    plt.xlabel("Time[sec]")
    plt.ylabel("Frequency[Hz]")
    plt.xlim([0, times[-1]])
    plt.ylim([0, 5000])
    plt.savefig("graph/spectrogram/" + filename.split("/")
                [1].split(".")[0] + "_" + window_name + ".png")
예제 #6
0
def plot_spectrogram(waveform, sampling_rate, window_name, filename):
    """
    スペクトログラムを表示
    """

    window_duration = 40.0 * 1.0e-3  # 窓関数の長さ、単位は秒
    window_shift = 5.0 * 1.0e-3  # 窓関数をスライドさせる長さ、単位は秒
    window_size = int(window_duration * sampling_rate)  # 窓関数のサンプル数
    window_overlap = int(
        (window_duration - window_shift) * sampling_rate)  # 隣接する窓関数の重なり

    # 窓関数本体
    if window_name == "hanning":
        window = scipy.hanning(window_size)  # ハニング窓
    elif window_name == "hamming":
        window = scipy.hamming(window_size)  # ハミング窓
    elif window_name == "gaussian":
        window = scipy.gaussian(window_size)  # ガウス窓??
    elif window_name == "blackman":
        window = scipy.blackman(window_size)  # ブラックマン窓
    elif window_name == "trianglar":
        window = scipy.triang(window_size)  # 三角窓??
    elif window_name == "rectanglar":
        window = scipy.rectang(window_size)  # 矩形窓??
    else:
        print "The window function name is wrong."
        exit()

    sp, freqs, times, ax = plt.specgram(waveform,
                                        NFFT=window_size,
                                        Fs=sampling_rate,
                                        window=window,
                                        noverlap=window_overlap)

    plt.title("Spectrogram [" + window_name + "] (" + filename + ")")
    plt.xlabel("Time[sec]")
    plt.ylabel("Frequency[Hz]")
    plt.xlim([0, times[-1]])
    plt.ylim([0, 5000])
    plt.savefig("graph/spectrogram/" + filename.split("/")[1].split(".")[0] +
                "_" + window_name + ".png")
예제 #7
0
    def __init__(self,
                 originalFrameSizeMs,
                 frameShiftMs,
                 sampleRate,
                 melCoeffCount,
                 numReconstructionIterations=5,
                 extraContext=0,
                 cutoff=7900,
                 normFactor=1.0,
                 useLogMels=True,
                 name='GriffinLim'):
        """Initializes three ring buffers, one for spectra and two for audio output"""
        super(GriffinLimSynthesis, self).__init__(name=name)
        self.useLogMels = useLogMels

        # Make sure no integer accidents happen
        frameSizeMs = float(originalFrameSizeMs)
        frameShiftMs = float(frameShiftMs)
        sampleRate = float(sampleRate)

        # Frame size and shift
        self.frameShiftMs = frameShiftMs
        self.sampleRate = sampleRate
        self.fftSize = int((frameSizeMs / 1000.0) * self.sampleRate)
        self.frameShift = int((frameShiftMs / 1000.0) * self.sampleRate)

        # Set block length accounting for overlap
        self.contextWidth = int(frameSizeMs / frameShiftMs)
        self.blockLen = self.contextWidth * 2 + 1 + extraContext

        # Length for ring buffers
        self.inputBufferLength = int(self.blockLen * 2.5)
        self.outputBufferLength = int(self.fftSize +
                                      self.frameShift * self.blockLen * 2.5)

        # Buffers and positions
        self.inputBuffer = []
        self.outputBuffer = []
        self.windowBuffer = []
        self.inputBufferPos = 0
        self.outputBufferPosMs = 0
        self.framePos = 0
        self.rfc = 0
        self.startTime = time.time()

        # Processing parameters
        self.normFactor = normFactor

        self.fftWindow = scipy.blackman(self.fftSize)
        self.numReconstructionIterations = numReconstructionIterations

        filterOrd = int((sampleRate / 1000.0) * frameShiftMs / 32.0)
        self.filterNumerator, self.filterDenominator = signal.iirfilter(
            filterOrd,
            float(cutoff) / float((sampleRate / 2)),
            btype="lowpass")
        self.filterState = signal.lfiltic(self.filterNumerator,
                                          self.filterDenominator, np.array([]))

        specSize = int(int((frameSizeMs / 1000.0) * sampleRate) / 2 + 1)
        self.melFilter = mel.MelFilterBank(specSize, melCoeffCount, sampleRate)
예제 #8
0
    def add_data(self, dataFrame, data_id=0):
        """Add a single frame of data, process it and potentially call callbacks.
           Buffer is allocated on first call."""
        dataFrame = dataFrame.flatten()

        # Allocate buffers, if None
        if self.inputBuffer == []:
            self.inputBuffer = np.zeros(
                (self.inputBufferLength, dataFrame.shape[0]))
            self.outputBuffer = np.zeros(self.outputBufferLength)
            self.windowBuffer = np.zeros(self.outputBufferLength)

        # Add frame to input buffer and increase the framePos which is the write pointer of the ringbuffer
        self.inputBuffer[self.inputBufferPos] = dataFrame
        self.inputBufferPos = (self.inputBufferPos +
                               1) % self.inputBufferLength
        self.framePos += 1

        # Calculate last index in the output buffer
        previousOutputBufferPos = int(
            (self.outputBufferPosMs / 1000.0) * self.sampleRate)

        # Compute new index in the output buffer
        self.outputBufferPosMs += self.frameShiftMs
        outputBufferPos = int(
            (self.outputBufferPosMs / 1000.0) * self.sampleRate)
        framesShifted = outputBufferPos - previousOutputBufferPos

        # Keep both indices in the range of the ring buffers
        previousOutputBufferPos = previousOutputBufferPos % self.outputBufferLength
        outputBufferPos = outputBufferPos % self.outputBufferLength

        # Force the index of the previous position to be smaller than the current index
        if previousOutputBufferPos >= outputBufferPos:
            previousOutputBufferPos = previousOutputBufferPos - self.outputBufferLength

        # Nothing more do until we have one complete block
        if self.framePos < self.blockLen - self.contextWidth:
            # TODO TEST ME
            return (np.array([]))

        # Indices for one input block
        bufferIndices = list(
            range(self.inputBufferPos - self.blockLen + self.contextWidth,
                  self.inputBufferPos))
        bufferIndices = [
            x + self.inputBufferLength if x < 0 else x for x in bufferIndices
        ]  # Not sure why indices must be positive

        # Process block
        reconstructedAudioFrames = self.reconstructWavFromSpectrogram(
            self.
            inputBuffer[bufferIndices],  # indices of the frames in the block
            self.blockLen * self.frameShift  # blockLen * frameshift in samples
        )

        # Zero out values in the buffer at the current buffer range
        newIndices = list(range(previousOutputBufferPos, outputBufferPos))
        newIndices = [
            x + self.outputBufferLength if x < 0 else x for x in newIndices
        ]
        self.outputBuffer[newIndices] = np.zeros(len(newIndices))
        self.windowBuffer[newIndices] = np.zeros(len(newIndices))

        # Overlapp-add restored frame
        reconstructedIndices = list(
            range(outputBufferPos - reconstructedAudioFrames.shape[0],
                  outputBufferPos))
        reconstructedIndices = [
            x + self.outputBufferLength if x < 0 else x
            for x in reconstructedIndices
        ]  # Make indices positive. Not sure why

        overlapWindow = scipy.blackman(reconstructedAudioFrames.shape[0])
        self.outputBuffer[
            reconstructedIndices] += reconstructedAudioFrames  # * overlapWindow
        self.windowBuffer[
            reconstructedIndices] += overlapWindow  # Why + and not * ???

        # Build finalized frame
        returnIndices = list(
            range(
                outputBufferPos - reconstructedAudioFrames.shape[0],
                outputBufferPos - reconstructedAudioFrames.shape[0] +
                framesShifted))
        returnIndices = [
            x + self.outputBufferLength if x < 0 else x %
            self.outputBufferLength for x in returnIndices
        ]
        returnBuffer = self.outputBuffer[returnIndices]
        returnWindowBuffer = self.windowBuffer[returnIndices]
        for i in range(len(returnBuffer)):
            if returnWindowBuffer[i] != 0:
                returnBuffer[i] = returnBuffer[i] / returnWindowBuffer[i]

        # Band-pass
        returnBuffer, self.filterState = signal.lfilter(self.filterNumerator,
                                                        self.filterDenominator,
                                                        returnBuffer,
                                                        zi=self.filterState)

        # Convert to int16 audio output and return
        self.rfc += len(returnBuffer)
        # print("RS RET FRAMES " + str(self.rfc) + " @ " + str(time.time()))
        # print(self.rfc / (time.time() - self.startTime))

        self.output_data(
            np.int16(
                np.clip(returnBuffer / (self.normFactor * 1.01), -0.99, 0.99) *
                (2**15 - 1)))
예제 #9
0
파일: test.py 프로젝트: ilyamirin/R_B_Scott
# generate samples, note conversion to float32 array
samples = (np.sin(2 * np.pi * np.arange(fs * duration) * f / fs)).astype(
    np.float32)
samples1 = (np.sin(2 * np.pi * np.arange(fs * duration) * f1 / fs)).astype(
    np.float32)
samples2 = (np.sin(2 * np.pi * np.arange(fs * duration) * f2 / fs)).astype(
    np.float32)

# for paFloat32 sample values must be in range [-1.0, 1.0]

data = volume * (samples + samples1 + samples2)

fig, (time_audiodata_ax, freq_range_ax) = plt.subplots(2, 1)

blk = scipy.blackman(fs)
time_audiodata_ax.plot(data)
freq_range_ax.set_title("freq")
fft = np.abs(np.fft.fft(data * blk, norm='ortho').real**2)
fft = fft[:len(fft) // 2]
# fft = fft[:int(len(fft) / 2)]  # keep only first half
freq = np.fft.fftfreq(len(data), 1.0 / fs)
freq = freq[:len(freq) // 2]
# freq = freq[:int(len(freq) / 2)]  # keep only first half
# freqPeak = freq[np.where(fft == np.max(fft))[0][0]] + 1

freq_range_ax.plot(freq, fft)
freqPeak = freq[np.where(fft == np.max(fft))[0][0]] + 1
print(freqPeak)

plt.show()
예제 #10
0
def db(x):
    return 20 * np.log10(x)


def pad(x, p):
    return np.pad(x, (0, p), 'constant', constant_values=0)


def k(M, beta):
    return pad(sc.kaiser(M, beta), p)


hamming = pad(sc.hamming(M), p)
hann = pad(sc.hanning(M), p)
bartlett = pad(sc.bartlett(M), p)
blackman = pad(sc.blackman(M), p)
kaiser = k(M, beta)
rectangular = pad(rect(M), p)

i = 0
while bins[i] < xaxis_max:
    inter = i
    i += 1

# dB plot of window as specified above

plt.figure(1)
plt.plot(bins[:inter], db(fft(k(M, beta))[:inter]))
plt.xlabel('Frequency [rad/s]', fontsize=13)
plt.ylabel('Amplitude [dB]', fontsize=13)
plt.axis([0, bins[inter - 1], -100, 0])
예제 #11
0
import sounddevice as sd
import numpy as np
from numpy.fft import fft, fftfreq, fftshift
import scipy
import matplotlib.pyplot as plt
import datetime
from math import log2, pow

# rate = 44100
rate = 44100
duration = 1200

plt.ion()
fig, (time_audiodata_ax, freq_range_ax) = plt.subplots(2, 1)
plt.show()
blackman = scipy.blackman(rate)

A4 = 440
C0 = A4 * pow(2, -4.75)
name = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]


def pitch(freq):
    h = round(12 * log2(freq / C0))
    octave = h // 12
    n = h % 12
    return name[n] + str(octave)


with sd.Stream(channels=1, samplerate=rate) as s:
    playback_speed_multiplier = 1
예제 #12
0
    if var == None:
        halfptr = length / 4.
        # solve exp(- (halfptr**2 / 2 * var)) = 0.5 for variance
        var = -halfptr**2 / (2 * np.log(0.5))
    ptr = np.arange((1 - length) / 2., (length + 1) / 2.)
    Y = np.exp(-ptr**2 / (2 * var))
    #Y = ((len+1)/2.) * (Y / sum(Y));
    return Y


if __name__ == '__main__':
    from pylab import *
    fftLen = 1024
    std = fftLen / 2

    Y1 = gaussian(fftLen)
    Y2 = hanning(fftLen)
    Y3 = hamming(fftLen)
    Y4 = blackman(fftLen)
    Y5 = bartlett(fftLen)
    #    Y6 = kaiser(fftLen)
    plot(Y1, label="gaussian")
    plot(Y2, label="hanning")
    plot(Y3, label="hamming")
    plot(Y4, label="blackman")
    plot(Y5, label="bartlett")
    #    plot(Y6, label="kaiser")
    ylim(0, 1)
    legend()
    show()
예제 #13
0
def play1():
    global output1
    global output0

    sd.play(input1, FS)
    sd.wait()

    x0 = np.arange(0, LEN_0, 1)
    x1 = np.arange(0, LEN_1, 1)
    y0 = np.reshape(input0, LEN_0)
    y1 = np.reshape(input1, LEN_1)

    w = blackman(CHUNK)

    #  Chunk numbers
    n0 = LEN_0 // CHUNK
    n1 = LEN_1 // CHUNK

    zf0 = np.zeros((n0, HALF_CHUNK))
    zf1 = np.zeros((n1, HALF_CHUNK))
    output0 = np.zeros((n0, HALF_CHUNK))
    output1 = np.zeros((n1, HALF_CHUNK))

    #  Spectrum calculation for first record
    for i in range(0, n0):
        zf0[i] = 2.0 / CHUNK * abs(
            fft(y0[CHUNK * i:CHUNK * (i + 1)] * w)[0:HALF_CHUNK])
        output0[i] = medfilt(zf0[i], MEDFILT_W)
        max0 = np.amax(output0[i])
        for j in range(0, HALF_CHUNK):
            #  Spectrum filter and mask
            if (output0[i][j] <= LOGIC_LEVEL * max0) or (np.abs(output0[i][j])
                                                         < NOISE_LEVEL):
                output0[i][j] = 0
            else:
                output0[i][j] = 1
    #  Spectrum calculation for second record
    for i in range(0, n1):
        zf1[i] = 2.0 / CHUNK * abs(
            fft(y1[CHUNK * i:CHUNK * (i + 1)] * w)[0:HALF_CHUNK])
        output1[i] = medfilt(zf1[i], MEDFILT_W)
        max1 = np.amax(output1[i])
        for j in range(0, HALF_CHUNK):
            #  Spectrum filter and mask
            if (output1[i][j] <= LOGIC_LEVEL * max1) or (np.abs(output1[i][j])
                                                         < NOISE_LEVEL):
                output1[i][j] = 0
            else:
                output1[i][j] = 1

    #  Spectrum comparison
    n = n1 + 1  # Number of comparisons
    xd = np.arange(n)
    matches = np.zeros(n)
    recognition = np.zeros(n)
    output1 = np.concatenate([output1, np.zeros((n0, HALF_CHUNK))])
    match_output1 = np.zeros((n0, HALF_CHUNK))
    for i in range(0, n):
        window = output1[i:i + n0, :]
        for j in range(0, n0):
            for k in range(0, HALF_CHUNK):
                match_output1[j][k] = window[j][k] and output0[j][k]
        total_harm = np.count_nonzero(output0)
        match_harm = np.count_nonzero(match_output1)
        # Match function
        if total_harm > 0:
            matches[i] = 1 - (total_harm - match_harm) / total_harm
        else:
            matches[i] = 0
        if i > 0:
            if ((matches[i] - matches[i - 1]) <= 0) and (matches[i - 1] >
                                                         MATCH_LEVEL):
                recognition[i] = recognition[i - 1] = 1

    if np.count_nonzero(matches) == 0:
        l3['text'] = "No matches found"
    else:
        l3['text'] = np.count_nonzero(matches)

    #  Input/output plotting
    plt.figure()
    plt.title('Blue - first record, orange - second record, green - match')
    plt.plot(x0 / FS, y0, x1 / FS, y1)  # , xd * n1 / ((n - 1) * N), matches
    plt.grid(True)
    plt.fill_between(xd * n1 / ((n - 1) * N),
                     -1,
                     1,
                     where=recognition > 0,
                     color='green',
                     alpha='0.75')
    plt.xlabel('Time, s')
    plt.show()