예제 #1
0
    def Playback(
        self
    ):  # similar to Go, but uses data from Load instead of collecting new data
        self.Status = True
        chunkSize = 8192
        windowSize = 3
        p = pyaudio.PyAudio()
        audioStream = p.open(format=pyaudio.paInt16,
                             channels=1,
                             rate=self.fs,
                             input=True,
                             frames_per_buffer=chunkSize)

        numSamples = len(self.Recording)
        self.Formants = np.zeros((100, 5), dtype=np.float32)
        self.FormantTime = np.zeros(100, dtype=np.float32)
        self.Pitch = np.zeros(100, dtype=np.float32)
        self.PitchTime = np.zeros(100, dtype=np.float32)
        PitchCount = 0
        FormantCount = 0

        ax = self.RawPlot.figure.add_subplot(111)

        f0ax = self.FundamentalFrequenncyPlot.figure.add_subplot(111)
        f0ax.tick_params(axis='x',
                         which='both',
                         bottom=False,
                         top=False,
                         labelbottom=False)
        f0ax.set_position([0.35, 0.05, 0.6, 0.93])
        tractAx = self.VocalTractPlot.figure.add_subplot(111)
        tractAx.tick_params(axis='x',
                            which='both',
                            bottom=False,
                            top=False,
                            labelbottom=False)
        tractAx.set_position([0.35, 0.05, 0.6, 0.93])
        tractAx.set_ylabel('Vocal Tract Length (cm)')
        tractAx.set_ylim((0, 25))
        tractAx.set_xlim((0, 0.8))

        formantAx = self.FormantPlot.figure.add_subplot(111)
        maxPitchLag = 3
        maxVocalLag = 3

        ds_rate = 3

        c = 34300  # speed of sound in cm/s

        Count = 0
        t = 0
        print('Beginning Playback')
        time = np.linspace(0, numSamples / self.fs, numSamples)
        try:
            start = ti.time()
            while t < numSamples - chunkSize and self.Status:
                t += chunkSize
                data = PyAudioTest.getChunk(chunkSize, audioStream, Random=0)
                data = self.Recording[t - chunkSize:t]
                data_ds = data[0:chunkSize:ds_rate]

                # use yin implementation
                data_hamming = data * np.hamming(chunkSize)
                df = yin.differenceFunction(data_hamming, chunkSize,
                                            self.fs / 75)
                cmndf = yin.cumulativeMeanNormalizedDifferenceFunction(
                    df, len(df))
                f0 = yin.getPitch(cmndf,
                                  self.fs / 500,
                                  self.fs / 75,
                                  harmo_th=0.35)

                if f0:
                    # store ot pitch and time
                    self.Pitch[PitchCount] = 1.0 * self.fs / f0
                    self.PitchTime[PitchCount] = 1.0 * (
                        t - chunkSize / 2) / self.fs
                    PitchCount += 1
                    # add space if needed
                    if PitchCount >= len(self.PitchTime):
                        self.Pitch = np.concatenate(
                            (self.Pitch, np.zeros(200, dtype=np.float32)))
                        self.PitchTime = np.concatenate(
                            (self.PitchTime, np.zeros(200, dtype=np.float32)))

                    RecentPitches = []
                    pitchIDX = PitchCount - 1
                    while self.PitchTime[pitchIDX] >= 1.0 * (
                            t - chunkSize /
                            2) / self.fs - maxPitchLag and pitchIDX >= 0:
                        RecentPitches.append(self.Pitch[pitchIDX])
                        pitchIDX -= 1

                    meanPitch = np.mean(RecentPitches)
                    if len(RecentPitches) == 1:
                        stdPitch = 25
                    else:
                        stdPitch = np.std(RecentPitches)
                    f0ax.bar([0], [2.0 * stdPitch],
                             bottom=[meanPitch - stdPitch])
                    f0ax.set_ylabel('Fundamental Frequency (Hz)')
                    f0ax.set_ylim((0, 500))
                    f0ax.set_xlim((0, 0.8))

                    self.FundamentalFrequenncyPlot.draw()

                # use my terrible gaussian estimation formant finder
                formantAx.clear()
                formantAx.hold(True)
                if f0:
                    fBins, PSD = sp.signal.periodogram(data_ds,
                                                       self.fs / ds_rate)
                    PSD = 20 * np.log10(PSD)
                    try:
                        Formants = FormantFinder.findFormantsLPC(
                            data_ds, self.fs / ds_rate)

                        for f in range(len(Formants)):
                            formantAx.plot([Formants[f], Formants[f]],
                                           [-100, 75],
                                           color='red')

                        formantAx.plot(fBins, PSD)
                        formantAx.set_title('Power Spectrum - Formants')
                        formantAx.set_xlabel('Frequency (Hz)')
                        formantAx.set_ylabel('Power (dB)')
                        formantAx.set_ylim((-90, 90))
                        formantAx.set_xlim((0, 5000))
                        '''
                        formantAx.bar(range(len(Formants)), Formants)
                        formantAx.set_xlabel('Formant number')
                        formantAx.set_ylabel('Frequency (Hz)')
                        formantAx.set_title('Formants Frequencies')
                        formantAx.set_xlim((0, 4.8))
                        formantAx.set_ylim((0, 5000))
                        formantAx.set_xticks([0.4, 1.4, 2.4, 3.4, 4.4])
                        formantAx.set_xticklabels(['F1', 'F2', 'F3', 'F4', 'F5'])
                        '''
                        self.FormantPlot.draw()
                        formantAx.hold(False)

                        if len(Formants) >= 5:
                            self.Formants[FormantCount, 0:5] = Formants[0:5]
                        else:
                            self.Formants[FormantCount,
                                          0:len(Formants)] = Formants
                        self.FormantTime[FormantCount] = 1.0 * (
                            t - chunkSize / 2) / self.fs
                        FormantCount += 1
                        # add space if needed
                        if FormantCount >= len(self.FormantTime):
                            self.Formants = np.concatenate(
                                (self.Formants,
                                 np.zeros((200, 5), dtype=np.float32)))
                            self.FormantTime = np.concatenate(
                                (self.FormantTime,
                                 np.zeros(200, dtype=np.float32)))

                        RecentTractLength = []
                        tractIDX = FormantCount - 1
                        while self.FormantTime[tractIDX] >= 1.0 * (
                                t - chunkSize /
                                2) / self.fs - maxVocalLag and tractIDX >= 0:
                            RecentTractLength.append(
                                FormantFinder.getVocalTractLength(
                                    self.Formants[tractIDX, :],
                                    c,
                                    method='lammert'))
                            tractIDX -= 1

                        meanTractLength = np.median(RecentTractLength)
                        if len(RecentTractLength) == 1:
                            stdTractLength = 2
                        else:
                            stdTractLength = np.std(RecentTractLength)
                        #TractLength = FormantFinder.getVocalTractLength(Formants, c)
                        tractAx.bar([0], [2 * stdTractLength],
                                    bottom=[meanTractLength - stdTractLength])
                        #tractAx.bar([0], [TractLength])
                        tractAx.set_ylabel('Vocal Tract Length (cm)')
                        tractAx.set_ylim((0, 25))
                        tractAx.set_xlim((0, 0.8))
                        self.VocalTractPlot.draw()

                    except (RuntimeError):
                        Formants = np.zeros(3)

                else:
                    fBins = np.linspace(0, self.fs / 2, 10)
                    PSD = np.zeros(10)

                Count += 1
                if t > windowSize * self.fs and Count % 3 == 0:
                    ax.plot(time[t - windowSize * self.fs:t],
                            self.Recording[t - windowSize * self.fs:t])
                    plt.xlim(t / self.fs - windowSize, t / self.fs + 1)
                    ax.set_xlabel('Time (s)')
                    ax.set_ylabel('amplitude')
                    ax.set_title('Raw Waveform')
                    self.RawPlot.draw()

                QtCore.QCoreApplication.processEvents()

        except (KeyboardInterrupt, SystemExit):
            self.FormantPlot.draw()
            self.RawPlot.draw()
            self.FundamentalFrequenncyPlot.draw()
            self.Pitch = self.Pitch[0:PitchCount]
            self.PitchTime = self.PitchTime[0:PitchCount]
            self.Formants = self.Formants[0:FormantCount, :]
            self.FormantTime = self.FormantTime[0:FormantCount]
            print('Recording Completed')
            print('recorded time is')
            print(1.0 * t / self.fs)
            print('elapsed time is:')
            print(ti.time() - start)
            return True

        self.Pitch = self.Pitch[0:PitchCount]
        self.PitchTime = self.PitchTime[0:PitchCount]
        self.Formants = self.Formants[0:FormantCount, :]
        self.FormantTime = self.FormantTime[0:FormantCount]
        print('Recording Completed')
        print('recorded time is')
        print(1.0 * t / self.fs)
        print('elapsed time is:')
        print(ti.time() - start)
예제 #2
0
def main():



    # CONSTANTS
    global min_peak_threshold
    min_peak_threshold = 3000
    global brightness
    brightness = 200
    global low_bright 
    low_bright = 75
    global chill_threshold 
    chill_threshold = .25
    global CHUNK 
    CHUNK = 2**11
    global RATE 
    RATE = 44100
    global basePath 
    basePath = r"http://192.168.1.135/api/xREOsUlYetInkIHuxDldgzqJYLZySU6xDIaobRsx/"
    global lightArray 
    lightArray = [6, 1, 3, 2, 4]

    #initalize audio stream
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK)

    #initialize peak variables
    max_peak = 0
    max_peak_time = datetime.datetime.now()
    oldoldpeak = 0
    oldpeak = 0
    peak = 0
    lastTriggered = datetime.datetime.now()
    max_buffer_counter = 0
    while True:
        percent_string=""
        data = np.fromstring(stream.read(CHUNK), dtype=np.int16)
        peak = np.average(np.abs(data))*2

        #max peak handling
        if (peak >= max_peak) or ((datetime.datetime.now() - max_peak_time) > datetime.timedelta(seconds=30)):
            if max_buffer_counter > 2:
                max_peak = peak
                max_peak_time = datetime.datetime.now()
                min_peak_threshold = max_peak/10
                max_buffer_counter = 0
                print("                                                     NEWMAX")
            else:
                max_buffer_counter += 1
        currentTrigger = datetime.datetime.now()

        #check for huge spike
        if (oldpeak + min_peak_threshold * 5) < peak or (oldoldpeak + min_peak_threshold * 5) < peak:
            if ((currentTrigger - lastTriggered) > datetime.timedelta(seconds=chill_threshold)):
                lastTriggered = currentTrigger
                pulseAll()

                percent_string=" 50%"
            else:
                print("Chill")

        #check for medium spike
        elif (oldpeak + min_peak_threshold * 3) < peak or (oldoldpeak + min_peak_threshold * 3) < peak:
            if ((currentTrigger - lastTriggered) > datetime.timedelta(seconds=chill_threshold)):
                lastTriggered = currentTrigger
                pulseEnds()
                percent_string=" 30%"
            else:
                print("Chill")

        #check for small spike
        elif (oldpeak + min_peak_threshold) < peak or (oldoldpeak+min_peak_threshold) < peak:
            if ((currentTrigger - lastTriggered) > datetime.timedelta(seconds=chill_threshold)):
                lastTriggered = currentTrigger
                pulseOne()
                percent_string=" 10%"
            else:
                print("Chill")

        #no spike
        else:
            percent_string=" 0%"

        #render wavform
        bars = "#" * int(200 * peak / 2**16)
        print("MAX:%05d Peak:%05d Delta:%s %s" % (max_peak, peak, percent_string, bars))

        oldoldpeak = oldpeak
        oldpeak = peak
        time.sleep(.1)

    
    stream.stop_stream()
    stream.close()
    p.terminate()
예제 #3
0
def Run(C, R, mic, Plot):
    CHUNK = 44100  # number of data points to read at a time 4096
    CHUNK = C
    # 4096 byte
    # the number of frames
    RATE = 44100  # 176400  # time resolution for reading device (Hz) 44100 samples/second
    RATE = R
    # sampling rate i.e the number of frames per second
    serSignal = 'S'
    KnockSignal = 'K'
    Input_Device_Index = 2
    Input_Device_Index = mic
    plot = Plot

    # Define the serial port
    ser_port = "COM8"  # for window computer, int must be used COM1 = 0,COM2=1 ...
    baud_rate = 9600
    count = 0
    flag = False
    signal = False

    mlab = Matlab(executable=r"D:\MATLAB\bin\matlab.exe")
    mlab.start()
    p = pyaudio.PyAudio()

    # while True:
    #     ser.write(serSignal.encode('utf-8'))
    #     if ser.readline().decode('utf-8') != "Spray":
    #         break

    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=RATE,
                    input=True,
                    input_device_index=None,
                    frames_per_buffer=CHUNK)
    ser = serial.Serial(ser_port, baud_rate)
    print(ser.readline().decode("utf-8"))
    print("Input delay is %f" % stream.get_input_latency())
    while (True):
        for i in range(int(3)):  #only loop forA int(??) times
            #if(count>1):
            #    sleep(1)
            if (count == 1):
                ser.write(KnockSignal.encode(
                    "utf-8"))  # encode is used for string.encode()
                sleep(.32)  # **change here (0.1s per 5000samples)
                flag = True
                print("Must Knock Here")
            # The input device id "2"   => built-in microphone
            # info = p.get_host_api_info_by_index(0)
            # numdevices = info.get('deviceCount')
            # for i in range(0, numdevices):
            #     if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
            #         pass
            #print('Input Device id', i, '-', p.get_device_info_by_host_api_device_index(0, i).get('name'))
            # get the default device info
            #print(p.get_default_input_device_info())

            # create a numpy array holding a single read of audio data

            #now = datetime.now()

            if flag == True:
                # if count ==1:
                #     sleep(.5)

                np.set_printoptions(threshold=sys.maxsize)

                data = np.fromstring(stream.read(CHUNK), dtype=np.short)
                #print(stream)
                time = np.arange(0, CHUNK)

                #peak=np.average(np.abs(data))*21
                #bars="#"*int(50*peak/2**16)
                #print("%04d %s"%(i,data))
                #print("%s %s" % (data/32768,now ))

                #print("Input data is ", type(data))

                # Test Matlab data 1
                #res = mlab.run_func('jk.m', {'arg1': data})
                #print("Output data is ", type(res['result']))
                #data1 = res['result']  # The data in matlab is float64 (e.g for 64bit window) https://stackoverflow.com/questions/8855574/convert-ndarray-from-float64-to-integer
                #M_data1 = data1[0] / 32768
                #print("jk.m is",res)

                # data1 = np.array(res['result'], dtype=np.float64).astype(np.int64)
                # print(type(data1))

                #Write data to text file before matlab
                # with open("SignalTest1.txt", "wt") as file:
                #     file.write("%s" % (str(M_data1).lstrip('[').rstrip(']')))
                #     file.flush()
                #     file.close()
                #     # file.writelines("%s %04d %s\n"%(now,i,data))
                #     # close the stream gracefully

                # max_val =np.amax(data)
                # print(max_val)
                # if max_val >30000:

                #data/32768
                #print(M_data1)

                if count == 1:
                    print("Write")
                    with open("SignalTest.txt", "wt") as out_file:
                        out_file.writelines(
                            str(data))  #it can only write string

                if plot == True and count == 2:

                    past = stream.get_time()
                    np.set_printoptions(threshold=sys.maxsize)
                    data = np.fromstring(stream.read(CHUNK), dtype=np.short)
                    present = stream.get_time()
                    delay = present - past
                    print("The delay is %f" % delay)

                    plt.title('AudioSample')
                    plt.plot(time, data)
                    plt.ylim(-40000, 40000)
                    plt.ylabel('Amplitude')
                    plt.xlabel('Sample Size')
                    #plt.pause(.0000000000000000000000000000000000000000000000000000000001)
                    #plt.clf()

                    #print(stream.get_time())

                    dataprocess = mlab.run_func(
                        'final_judge.m', {"arg1": data})  # ,{'arg1':data}
                    # print("The input data is ",M_data1)
                    print(np.amax(data))
                    print(dataprocess['result'])
                    d1 = dataprocess['result']

                    if d1 == 1:
                        ser.write(serSignal.encode(
                            "utf-8"))  # encode is used for string.encode()
                        # print(ser.write(serSignal.encode("utf-8")))
                        #print(ser.readline().decode("utf-8"))
                        #d1 = 2
                    plt.show()
                    flag = False

                    count = 0
            count += 1

        #ser.reset_output_buffer()
    mlab.stop()
    out_file.close()
    stream.stop_stream()
    stream.close()
    p.terminate()

    sys.exit(0)
예제 #4
0
    def play_rec(self,
                 out_file_name,
                 recode_second,
                 device_name='ReSpeaker 4 Mic Array (UAC1.0)',
                 CHUNK=1024,
                 input_file_name='./test_out.wav',
                 need_data=False,
                 order_index=None,
                 order_ch=None):
        # file_name = '../_exp/Speaker_Sound/up_tsp_1num.wav'
        wf = wave.open(out_file_name, 'rb')
        sampling = wf.getframerate()
        if order_index is not None:
            index = order_index
            channels = order_ch
        else:
            index, channels = self.get_index(device_name)
        p = pyaudio.PyAudio()

        stream1 = p.open(
            format=pyaudio.paInt16,
            channels=channels,
            rate=sampling,
            frames_per_buffer=CHUNK,
            input=True,
            input_device_index=index,
        )

        stream2 = p.open(format=pyaudio.paInt16,
                         channels=1,
                         rate=sampling,
                         frames_per_buffer=CHUNK,
                         output=True)

        if sampling * recode_second < wf.getnframes():
            print('Error recode time is not enough',
                  wf.getnframes() / sampling)
            sys.exit()

        elif sampling * recode_second > wf.getnframes() * 2:
            print('Error recode time is too long')
            sys.exit()

        else:
            out_data = wf.readframes(CHUNK)
            in_data = stream1.read(CHUNK)
            recoding_data = [in_data]
            for i in range(0, int(sampling / CHUNK * recode_second)):
                input_data = stream1.read(CHUNK)
                recoding_data.append(input_data)
                if out_data != b'':
                    stream2.write(out_data)
                    out_data = wf.readframes(CHUNK)
            recoded_data = b''.join(recoding_data)
            # print(type(recoded_data))
            self.wave_save(recoded_data,
                           channels=channels,
                           sampling=sampling,
                           wave_file=input_file_name)

            stream1.stop_stream()
            stream2.stop_stream()
            stream1.close()
            stream2.close()
            p.terminate()
            if need_data:
                # print('use data return data', np.frombuffer(np.array(recoding_data), dtype='int16').shape)
                recoded_input_data = np.array(np.frombuffer(np.array(recoding_data), dtype='int16'))\
                    .reshape((channels, -1), order='F')
                return recoded_input_data, sampling
#2019h1030124h

import pyaudio
import wave

# output file name
name = "recordedAud.mp3"
chunk = 1024
# sample format
FORMAT = pyaudio.paInt16
channels = 1
# 44100 samples per second
sample_rate = 44100
record_seconds = 10
# initialize PyAudio object
obj = pyaudio.PyAudio()
# open stream object as input & output
stream = obj.open(format=FORMAT,
                  channels=channels,
                  rate=sample_rate,
                  input=True,
                  output=True,
                  frames_per_buffer=chunk)
frames = []
print("Recording audio...")
for i in range(int(44100 / chunk * record_seconds)):
    data = stream.read(chunk)
    frames.append(data)
print("Finished recording.")

stream.stop_stream()
예제 #6
0
    def recognize(self, args, userin, user_full_name, user_prefix):

        with noalsaerr():
            p = pyaudio.PyAudio()  # Create a PyAudio session
        # Create a stream
        stream = p.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            output=True,
            frames_per_buffer=CHUNK)

        try:
            data = stream.read(
                CHUNK)  # Get first data frame from the microphone
            # Loop over the frames of the audio / data chunks
            while data != '':
                rms = audioop.rms(
                    data, 2)  # Calculate Root Mean Square of current chunk
                if rms >= THRESHOLD:  # If Root Mean Square value is greater than THRESHOLD constant
                    self.decoder_pipeline.init_request(
                        "recognize",
                        "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1"
                    )
                    self.decoder_pipeline.process_data(data)
                    silence_counter = 0  # Define silence counter
                    # While silence counter value less than SILENCE_DETECTION constant
                    while silence_counter < SILENCE_DETECTION:
                        data = stream.read(
                            CHUNK)  # Read a new chunk from the stream
                        if LISTENING:
                            stream.write(data, CHUNK)
                        self.decoder_pipeline.process_data(data)

                        rms = audioop.rms(
                            data, 2
                        )  # Calculate Root Mean Square of current chunk again
                        if rms < THRESHOLD:  # If Root Mean Square value is less than THRESHOLD constant
                            silence_counter += 1  # Then increase silence counter
                        else:  # Else
                            silence_counter = 0  # Assign zero value to silence counter

                    stream.stop_stream()
                    self.decoder_pipeline.end_request()
                    while not self.finished:
                        time.sleep(0.1)
                    stream.start_stream()
                    words = self.words
                    words = [x for x in words if x != '<#s>']
                    com = ' '.join(words)
                    her = VirtualAssistant(args, userin, user_full_name, user_prefix)
                    t = Thread(target=her.command, args=(com,))
                    t.start()
                    self.reset()

                data = stream.read(CHUNK)  # Read a new chunk from the stream
                if LISTENING:
                    stream.write(data, CHUNK)

        except KeyboardInterrupt:
            stream.stop_stream()
            stream.close()
            p.terminate()
            self.loop.quit()
            raise KeyboardInterrupt
예제 #7
0
 def __init__(self):
     self.p = pyaudio.PyAudio()
     self.stream = self.p.open(
         format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
     self.bitlist = [0]
예제 #8
0
    def start(self,
              detected_callback=play_audio_file,
              interrupt_check=lambda: False,
              sleep_time=0.03):
        """
        Start the voice detector. For every `sleep_time` second it checks the
        audio buffer for triggering keywords. If detected, then call
        corresponding function in `detected_callback`, which can be a single
        function (single model) or a list of callback functions (multiple
        models). Every loop it also calls `interrupt_check` -- if it returns
        True, then breaks from the loop and return.

        :param detected_callback: a function or list of functions. The number of
                                  items must match the number of models in
                                  `decoder_model`.
        :param interrupt_check: a function that returns True if the main loop
                                needs to stop.
        :param float sleep_time: how much time in second every loop waits.
        :return: None
        """
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(
                self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback)

        if interrupt_check():
            logger.debug("detect voice return")
            return

        tc = type(detected_callback)
        if tc is not list:
            detected_callback = [detected_callback]
        if len(detected_callback) == 1 and self.num_hotwords > 1:
            detected_callback *= self.num_hotwords

        assert self.num_hotwords == len(detected_callback), \
            "Error: hotwords in your models (%d) do not match the number of " \
            "callbacks (%d)" % (self.num_hotwords, len(detected_callback))

        logger.debug("detecting...")

        while True:
            if interrupt_check():
                logger.debug("detect voice break")
                break
            data = self.ring_buffer.get()
            if len(data) == 0:
                time.sleep(sleep_time)
                continue

            ans = self.detector.RunDetection(data)
            if ans == -1:
                logger.warning(
                    "Error initializing streams or reading audio data")
            elif ans > 0:
                message = "Keyword " + str(ans) + " detected at time: "
                message += time.strftime("%Y-%m-%d %H:%M:%S",
                                         time.localtime(time.time()))
                logger.info(message)
                callback = detected_callback[ans - 1]
                if callback is not None:
                    callback()

        logger.debug("finished.")
def listen():
    # 変数
    status = Status.WAIT
    # SYN判定用のハミング符号分を含めた直近16ビットのバッファ
    recent_bin_data = np.zeros(16, dtype=np.int8)

    # # 検波したバイナリデータ
    # bin_data = np.empty(0).astype(np.int8)

    # メッセージ本文の2進数データ
    input_bin_data = np.empty(0).astype(np.int8)

    # PyAudioの初期化処理
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=common.SR,
                    frames_per_buffer=CHUNK,
                    input=True)

    # マイクから入力値を受け付ける
    while stream.is_active():
        raw_data = stream.read(CHUNK)
        data = np.frombuffer(raw_data, dtype=np.int16) / np.iinfo(np.int16).max

        signal = demodulation(data)

        recent_bin_data = np.roll(recent_bin_data, -1)
        recent_bin_data[-1] = signal
        print("chunk", recent_bin_data)

        # 待ち状態の場合
        if status == Status.WAIT:
            status_text.set("wait")
            # 直近のデータがSYNコードと一致するか判定
            if check_syn(recent_bin_data):
                # SYNコードの場合入力受付状態に変更
                status = Status.READY

        # 入力受付状態の場合
        elif status == Status.READY:
            status_text.set("ready")
            input_bin_data = np.r_[input_bin_data, signal]

            print("input bin data", input_bin_data)

            # 信号の受信に失敗した場合は状態を戻す
            if np.all(recent_bin_data == -1):
                button.configure(state=tk.NORMAL)
                status_text.set("error")
                break

            # SYNコードか判定
            if len(input_bin_data) % 16 == 0 and check_syn(recent_bin_data):
                # 直近8ビットがsynコードなので入力データから除外
                input_bin_data = input_bin_data[:-16]

                # 成功処理
                correct_data = correct_hamming_code(input_bin_data)
                message = decode(correct_data)
                end(message)
                break

    # PyAudioの終了処理
    stream.stop_stream()
    stream.close()
    p.terminate()
예제 #10
0
    def __init__(self, language_code='en-US', last_contexts=None):
        """Initialize all params and load data"""
        """ Constants and params """
        self.CHUNK = 4096
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 16000
        self.USE_AUDIO_SERVER = rospy.get_param('/dialogflow_client/use_audio_server', False)
        self.PLAY_AUDIO = rospy.get_param('/dialogflow_client/play_audio', True)
        self.DEBUG = rospy.get_param('/dialogflow_client/debug', False)

        # Register Ctrl-C sigint
        signal.signal(signal.SIGINT, self._signal_handler)

        """ Dialogflow setup """
        # Get hints/clues
        rp = rospkg.RosPack()
        file_dir = rp.get_path('dialogflow_ros') + '/config/context.yaml'
        with open(file_dir, 'r') as f:
            try:
                self.phrase_hints = load(f)
            except YAMLError:
                rospy.logwarn("DF_CLIENT: Unable to open phrase hints yaml file!")
                self.phrase_hints = []

        # Dialogflow params
        project_id = rospy.get_param('/dialogflow_client/project_id', 'my-project-id')
        session_id = str(uuid4())  # Random
        self._language_code = language_code
        self.last_contexts = last_contexts if last_contexts else []
        # DF Audio Setup
        audio_encoding = AudioEncoding.AUDIO_ENCODING_LINEAR_16
        # Possibel models: video, phone_call, command_and_search, default
        self._audio_config = InputAudioConfig(audio_encoding=audio_encoding,
                                              language_code=self._language_code,
                                              sample_rate_hertz=self.RATE,
                                              phrase_hints=self.phrase_hints,
                                              model='command_and_search')
        self._output_audio_config = OutputAudioConfig(
                audio_encoding=OutputAudioEncoding.OUTPUT_AUDIO_ENCODING_LINEAR_16
        )
        # Create a session
        self._session_cli = dialogflow_v2beta1.SessionsClient()
        self._session = self._session_cli.session_path(project_id, session_id)
        rospy.logdebug("DF_CLIENT: Session Path: {}".format(self._session))

        """ ROS Setup """
        results_topic = rospy.get_param('/dialogflow_client/results_topic',
                                        '/dialogflow_client/results')
        requests_topic = rospy.get_param('/dialogflow_client/requests_topic',
                                         '/dialogflow_client/requests')
        text_req_topic = requests_topic + '/string_msg'
        text_event_topic = requests_topic + '/string_event'
        msg_req_topic = requests_topic + '/df_msg'
        event_req_topic = requests_topic + '/df_event'
        self._results_pub = rospy.Publisher(results_topic, DialogflowResult,
                                            queue_size=10)
        rospy.Subscriber(text_req_topic, String, self._text_request_cb)
        rospy.Subscriber(text_event_topic, String, self._text_event_cb)
        rospy.Subscriber(msg_req_topic, DialogflowRequest, self._msg_request_cb)
        rospy.Subscriber(event_req_topic, DialogflowEvent, self._event_request_cb)

        """ Audio setup """
        # Mic stream input setup
        self.audio = pyaudio.PyAudio()
        self._server_name = rospy.get_param('/dialogflow_client/server_name',
                                            '127.0.0.1')
        self._port = rospy.get_param('/dialogflow_client/port', 4444)

        if self.PLAY_AUDIO:
            self._create_audio_output()

        rospy.logdebug("DF_CLIENT: Last Contexts: {}".format(self.last_contexts))
        rospy.loginfo("DF_CLIENT: Ready!")
예제 #11
0
    def __init__(self, API_KEY, URL, enviornment_id, collection_id,
                 NLU_API_KEY, NLU_URL, ASSISTANT_API_KEY, ASSISTANT_URL,
                 ASSISSTANT_ID, S2T_KEY, S2T_URL, SMMY_API_KEY):
        '''
        Initialize a hindsight chatbot

        :param API_KEY: IBM Watson Discovery API Key
        :param URL: IBM Watson Discovery base url
        :param enviornment_id: IBM Enviornment id
        :param collection_id: IBM document collection id
        :return:
        '''
        self.chat_states = {'add_mode': 1, 'ask_mode': 2}
        self.speech_mode_enabled = False

        self.intents = {
            'show_notes': 1,
            'summarize_notes': 2,
            'sentiment_notes': 3
        }

        self.state = self.chat_states['add_mode']
        self.prompt = '>>> '
        self.chatprompt = '\t~~~ '
        self.state_prompt = 'Add a note: '

        self.discovery = DiscoveryV1(version='2018-12-03',
                                     iam_apikey=API_KEY,
                                     url=URL)

        self.nlu = NaturalLanguageUnderstandingV1(version='2018-11-16',
                                                  iam_apikey=NLU_API_KEY,
                                                  url=NLU_URL)

        self.assistant = AssistantV2(version='2018-11-08',
                                     iam_apikey=ASSISTANT_API_KEY,
                                     url=ASSISTANT_URL)

        self.session_id = self.assistant.create_session(
            assistant_id=ASSISSTANT_ID).get_result()['session_id']

        self.enviornment_id = enviornment_id
        self.collection_id = collection_id
        self.assistant_id = ASSISSTANT_ID

        self.ROOT_PATH = sys.path[0]

        self.METADATA_PATH = self.ROOT_PATH + '/notes_metadata'
        if not os.path.exists(self.METADATA_PATH):
            os.makedirs(self.METADATA_PATH)

        self.GLOBAL_ENTITIES = self.ROOT_PATH + '/notes_metadata/global_entities.p'
        if not os.path.exists(self.GLOBAL_ENTITIES):
            t = {'NULL': 0}
            pickle.dump(t, open(self.GLOBAL_ENTITIES, "wb"))

        self.GLOBAL_DOC_IDS = self.ROOT_PATH + '/notes_metadata/global_doc_ids.p'
        if not os.path.exists(self.GLOBAL_DOC_IDS):
            t = {'NULL': '/'}
            pickle.dump(t, open(self.GLOBAL_DOC_IDS, "wb"))

        self.NOTES_PATH = self.ROOT_PATH + '/notes.html'
        if not os.path.exists(self.NOTES_PATH):
            os.makedirs(self.NOTES_PATH)

        self.INTENT_LINES = []
        if not os.path.exists(self.ROOT_PATH + '/intent_training_data.csv'):
            print('!!! ERROR: ./scripts/intent_training_data.csv required')
            quit()
        lines = open(self.ROOT_PATH + '/intent_training_data.csv').readlines()
        self.INTENT_LINES = [l.strip().split(',')[0] for l in lines]

        self.S2T_KEY = S2T_KEY
        self.S2T_URL = S2T_URL

        self.pyAudio = pyaudio.PyAudio()

        self.SMMY_API_KEY = SMMY_API_KEY
예제 #12
0
    def recognize(self, args, userin, user_full_name, user_prefix):

        with noalsaerr():
            p = pyaudio.PyAudio()  # Create a PyAudio session
        # Create a stream
        stream = p.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            output=True,
            frames_per_buffer=CHUNK)

        try:
            data = stream.read(CHUNK)  # Get first data frame from the microphone
            # Loop over the frames of the audio / data chunks
            audio = None
            # print("START LISTENNING")
            while data != '':
                rms = audioop.rms(data, 2)  # Calculate Root Mean Square of current chunk
                if rms >= THRESHOLD:  # If Root Mean Square value is greater than THRESHOLD constant
                    audio = data
                    silence_counter = 0  # Define silence counter
                    # While silence counter value less than SILENCE_DETECTION constant
                    while silence_counter < SILENCE_DETECTION:
                        data = stream.read(CHUNK)  # Read a new chunk from the stream
                        if LISTENING:
                            stream.write(data, CHUNK)
                        audio = audio + data

                        rms = audioop.rms(data, 2)  # Calculate Root Mean Square of current chunk again
                        if rms < THRESHOLD:  # If Root Mean Square value is less than THRESHOLD constant
                            silence_counter += 1  # Then increase silence counter
                        else:  # Else
                            silence_counter = 0  # Assign zero value to silence counter

                    # print("Analyzing...")
                    stream.stop_stream()

                    audio_data = sr.AudioData(audio, RATE, p.get_sample_size(FORMAT))
                    try:
                        com = self.recognizer.recognize_google(audio_data)
                        print(com)
                        her = VirtualAssistant(args, userin, user_full_name, user_prefix)
                        t = Thread(target=her.command, args=(com,))
                        t.start()
                    except sr.UnknownValueError:
                        # print("Google Speech Recognition could not understand audio")
                        pass
                    except sr.RequestError as e:
                        print("Could not request results from Google Speech Recognition service; {0}".format(e))

                    stream.start_stream()
                    self.reset()

                data = stream.read(CHUNK)  # Read a new chunk from the stream
                if LISTENING:
                    stream.write(data, CHUNK)

        except KeyboardInterrupt:
            stream.stop_stream()
            stream.close()
            p.terminate()
            # self.loop.quit()
            raise KeyboardInterrupt
예제 #13
0
def recognize():
    # Voice Authentication
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    CHUNK = 1024
    RECORD_SECONDS = 4
    FILENAME = "./test.wav"

    audio = pyaudio.PyAudio()
   
    # start Recording
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                    rate=RATE, input=True,
                    frames_per_buffer=CHUNK)

    time.sleep(2.0)
    print("recording...")
    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)
    print("finished recording")


    # stop Recording
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # saving wav file 
    waveFile = wave.open(FILENAME, 'wb')
    waveFile.setnchannels(CHANNELS)
    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
    waveFile.setframerate(RATE)
    waveFile.writeframes(b''.join(frames))
    waveFile.close()

    modelpath = "./gmm_models/"

    gmm_files = [os.path.join(modelpath,fname) for fname in 
                os.listdir(modelpath) if fname.endswith('.gmm')]

    models    = [pickle.load(open(fname,'rb')) for fname in gmm_files]

    speakers   = [fname.split("/")[-1].split(".gmm")[0] for fname 
                in gmm_files]
  
    if len(models) == 0:
        print("No Users in the Database!")
        return
        
    #read test file
    sr,audio = read(FILENAME)

    # extract mfcc features
    vector = extract_features(audio,sr)
    log_likelihood = np.zeros(len(models)) 

    #checking with each model one by one
    for i in range(len(models)):
        gmm = models[i]         
        scores = np.array(gmm.score(vector))
        log_likelihood[i] = scores.sum()

    pred = np.argmax(log_likelihood)
    identity = speakers[pred]
   
    # if voice not recognized than terminate the process
    if identity == 'unknown':
            print("Not Recognized! Try again...")
            return
    
    print( "Recognized as - ", identity)

    # face recognition
    print("Keep Your face infront of the camera")
    cap = cv2.VideoCapture(0)
    cap.set(3, 640)
    cap.set(4, 480)

    cascade = cv2.CascadeClassifier('./haarcascades/haarcascade_frontalface_default.xml')
    
    #loading the database 
    database = pickle.load(open('face_database/embeddings.pickle', "rb"))
    
    time.sleep(1.0)
    
    start_time = time.time()
    
    while True:
        curr_time = time.time()
            
        _, frame = cap.read()
        frame = cv2.flip(frame, 1, 0)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        face = cascade.detectMultiScale(gray, 1.3, 5)
         
        name = 'unknown'
        
        
        if len(face) == 1:

            for (x, y, w, h) in face:
                roi = frame[y-10:y+h+10, x-10:x+w+10]
            
                fh, fw = roi.shape[:2]
                min_dist = 100
                
                #make sure the face is of required height and width
                if fh < 20 and fh < 20:
                    continue

                
                #resizing image as required by the model
                img = cv2.resize(roi, (96, 96))

                #128 d encodings from pre-trained model
                encoding = img_to_encoding(img)
                
                # loop over all the recorded encodings in database 
                for knownName in database:
                    # find the similarity between the input encodings and recorded encodings in database using L2 norm
                    dist = np.linalg.norm(np.subtract(database[knownName], encoding) )
                    # check if minimum distance or not
                    if dist < min_dist:
                        min_dist = dist
                        name = knownName

            # if min dist is less then threshold value and face and voice matched than unlock the door
            if min_dist <= 0.4 and name == identity:
                print ("Door Unlocked! Welcome " + str(name))
                break

        #open the cam for 3 seconds
        if curr_time - start_time >= 3:
            break    

        cv2.waitKey(1)
        cv2.imshow('frame', frame)
        
    cap.release()
    cv2.destroyAllWindows()
   
    if len(face) == 0:
        print('There was no face found in the frame. Try again...')
        
    elif len(face) > 1:
        print("More than one faces found. Try again...")
        
    elif min_dist > 0.4 or name != identity:
        print("Not Recognized! Try again...")
예제 #14
0
port = 23333
addr = "239.192.0.233"
buf_size = 65536

pygame.init()
pygame.camera.init()
size = (128, 96)
cam = pygame.camera.Camera("/dev/video0", size)

NUM_SAMPLES = 2000
framerate = 8000
channels = 1
sampwidth = 2
sleep_time = 0.25

pin = pyaudio.PyAudio()
streamin = pin.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=framerate,
                    input=True,
                    frames_per_buffer=NUM_SAMPLES)
pout = pyaudio.PyAudio()
streamout = pout.open(format=pyaudio.paInt16,
                      channels=1,
                      rate=framerate,
                      output=True)

TYPE = 3


def init():
예제 #15
0
def find_input_devices():
    pa = pyaudio.PyAudio()
    for i in range(pa.get_device_count()):
        devinfo = pa.get_device_info_by_index(i)
        print("Device %d: %s" % (i, devinfo["name"]))
예제 #16
0
 def __init__(self, freq):
     self.audio = pyaudio.PyAudio()
     self.freq = freq
     self.stream = audio.get_stream(self.audio, output=True)
     self.ping_buffer = make_buffer_from_bit_pattern(
         self.bitstream, self.freq, 0)
    CHANNELS = 1

    CHUNK_SIZE = int(FRAME_LEN * SAMPLE_RATE)
    asr = FrameASR(model_definition={
        'sample_rate': SAMPLE_RATE,
        'AudioToMelSpectrogramPreprocessor': cfg.preprocessor.params,
        'JasperEncoder': cfg.encoder.params,
        'labels': cfg.decoder.params.vocabulary
    },
                   frame_len=FRAME_LEN,
                   frame_overlap=2,
                   offset=4)

    asr.reset()

    p = pa.PyAudio()
    #print('Available audio input devices:')
    input_devices = []
    for i in range(p.get_device_count()):
        dev = p.get_device_info_by_index(i)
        if dev.get('maxInputChannels'):
            input_devices.append(i)
            #print(i, dev.get('name'))

    if len(input_devices):
        dev_idx = -2
        while dev_idx not in input_devices:
            # print('Please type input device ID:')
            dev_idx = 3  # 3 for virtal cable

        empty_counter = 0
예제 #18
0
def main():
    if len(sys.argv) != 4:
        print("Error!!!!")
        exit()
    ip = sys.argv[1]  # Server's ip ---argv:argumento
    port = sys.argv[2]  # Server's port
    identity = sys.argv[3].encode('ascii')
    connected = False

    context = zmq.Context()
    s = context.socket(zmq.DEALER)
    s.identity = identity
    s.connect("tcp://{}:{}".format(
        ip,
        port))  #corchetes por que se enviaron los parametros el ip y el port

    print("Started client with id {}".format(identity))

    poller = zmq.Poller()
    poller.register(sys.stdin, zmq.POLLIN)
    poller.register(s, zmq.POLLIN)
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    CHUNK = 1024
    RECORD_SECONDS = 0.3

    p = pyaudio.PyAudio()
    first = True

    global queue

    print("\n----Menu----")
    print("- 'bring' {id de usuario}  ......... Invitar a sesion (sin llaves)")
    print("- 'exit'                   ......... Salir del programa")

    threads = []
    while True:
        socks = dict(poller.poll())
        if s in socks:
            op, *msg = s.recv_multipart()
            if op.decode() == "connect":
                connected = True
            elif op.decode() == "play":
                #RECIBIENDO FRAMES
                if msg[0] in queue:
                    queue[msg[0]].append(msg[1:])
                else:
                    queue[msg[0]] = []
                    queue[msg[0]].append(msg[1:])
                    threads.append(
                        threading.Thread(target=play, args=(msg[0], )))
                    threads[-1].start()
        if sys.stdin.fileno() in socks:
            command = input()
            command = command.split()
            if command[0] == "bring":
                s.send_multipart(
                    [bytes(command[0], 'ascii'),
                     bytes(command[1], 'ascii')])
                connected = True
            elif command[0] == "exit":
                s.send_multipart(
                    [bytes(command[0], 'ascii'),
                     bytes("NA", 'ascii')])
                break
            else:
                print(' Operacion no soportada')
        if connected:
            if first:
                stream = p.open(format=FORMAT,
                                channels=CHANNELS,
                                rate=RATE,
                                input=True,
                                output=False,
                                frames_per_buffer=CHUNK)

                first = False
            frames = [bytes('send', 'ascii')]
            for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
                frames.append(stream.read(CHUNK))
            #ENVIANDO FRAMES
            s.send_multipart(frames)

    p.terminate()
    stream.stop_stream()
    stream.close()
예제 #19
0
def main():
    mac_addr = open('/sys/class/net/wlan0/address').readline()

    # Connect to db
    con = MySQLdb.Connection(host=HOST,
                             port=PORT,
                             user=USER,
                             passwd=PASSWORD,
                             db=DB)
    c = con.cursor()
    c.execute(
        '''CREATE TABLE IF NOT EXISTS zeroData(temp FLOAT, pres FLOAT, hum FLOAT, gas FLOAT, lux INTEGER, db FLOAT, dt DATETIME)'''
    )

    # Initialize db
    parser = argparse.ArgumentParser()
    parser.add_argument("db", help="zeroData")
    parser.add_argument("token", help="35d4aa441b94cdbae7404050edd3fad6")
    args = parser.parse_args()
    corlysis_params = {
        "db": args.db,
        "u": "token",
        "p": args.token,
        "precision": "ms"
    }

    # Initialize sensor
    bme = bme680.BME680(i2c_addr=0x77)
    bme.set_humidity_oversample(bme680.OS_2X)
    bme.set_pressure_oversample(bme680.OS_4X)
    bme.set_temperature_oversample(bme680.OS_8X)
    bme.set_filter(bme680.FILTER_SIZE_3)
    bme.set_gas_status(bme680.ENABLE_GAS_MEAS)

    # Initialize USB mic
    pyaud = pyaudio.PyAudio()
    stream = pyaud.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=32000,
                        input_device_index=2,
                        input=True)

    payload = ""
    counter = 1
    problem_counter = 0

    now = time.strftime('%Y-%m-%d %H:%M:%S')
    print("Readings began " + now)
    print("Press ctrl+c to end readings and close connection.")

    animation = "|/-\\"
    aniCount = 0

    # Main loop
    while (True):
        try:
            # Get time for corlysis and db
            unix_time_ms = int(time.time() * 1000)
            now = time.strftime('%Y-%m-%d %H:%M:%S')

            # Read from BME
            bme.get_sensor_data()
            tempCelcius = float("{0:.2f}".format(bme.data.temperature))
            # Convert the above variable to fahrenheit
            temperature = float(tempCelcius * (9 / 5) + 32)
            pressure = float("{0:.2f}".format(bme.data.pressure))
            humidity = float("{0:.2f}".format(bme.data.humidity))
            gas = float("{0:.2f}".format(bme.data.gas_resistance))

            # Read from lux sensor
            tsl = TSL2561(debug=True)
            luxVal = tsl.lux()

            # Read from USB mic
            rawsamps = stream.read(2048, exception_on_overflow=False)
            samps = numpy.fromstring(rawsamps, dtype=numpy.int16)
            deciVal = analyse.loudness(samps) + 65

            line = "sensors_data temperature={},pressure={},humidity={},luxVal={},decib={} {}\n".format(
                temperature, pressure, humidity, luxVal, deciVal, unix_time_ms)
            payload += line

            if counter % SENDING_PERIOD == 0:
                try:
                    # try to send data to cloud
                    r = requests.post(URL,
                                      params=corlysis_params,
                                      data=payload)
                    if r.status_code != 204:
                        raise Exception("data not written")
                    payload = ""
                except:
                    problem_counter += 1
                    print('cannot write to InfluxDB')
                    if problem_counter == MAX_LINES_HISTORY:
                        problem_counter = 0
                        payload = ""

            counter += 1

            # Print animation
            sys.stdout.write("\rCollecting data... " + animation[aniCount])
            sys.stdout.flush()
            aniCount += 1
            if (aniCount == 4):
                aniCount = 0

            time_diff_ms = int(time.time() * 1000) - unix_time_ms
            # print(time_diff_ms)
            if time_diff_ms < READING_DATA_PERIOD_MS:
                time.sleep((READING_DATA_PERIOD_MS - time_diff_ms) / 1000.0)

            values = (mac_addr, temperature, pressure, humidity, gas, luxVal,
                      deciVal, now)
            add_val = ("INSERT INTO data "
                       "(mac, temp, pres, hum, gas, lux, db, dt)"
                       "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
            c.execute(add_val, values)
            con.commit()

        except KeyboardInterrupt:
            con.close()
            break

        except Exception as e:
            pass
            print(e)
예제 #20
0
__author__ = 'Victor'
import sys
import math
import wave
import struct
import curses
import pyaudio
import numpy as np
import matplotlib.pyplot as plt

standard = curses.initscr()
standard.nodelay(True)
curses.noecho()
curses.cbreak()

pythonAudioObject = pyaudio.PyAudio()
MODE = sys.argv[1]
FOLD = 1
SAMPLE_RATE = 44100
CHANNELS = 2
WIDTH = 2

try:
    IterationsN = int(sys.argv[3])
except (ValueError, IndexError):
    print('The second argument has to be a number.')
    sys.exit()


def main():
    standard.addstr('Noise-cancelling live')
예제 #21
0
def arduino_soundlight():
    chunk = 2**11  # Change if too fast/slow, never less than 2**11
    scale = 50  # Change if too dim/bright
    exponent = 5  # Change if too little/too much difference between loud and quiet sounds
    samplerate = 44100

    # CHANGE THIS TO CORRECT INPUT DEVICE
    # Enable stereo mixing in your sound card
    # to make you sound output an input
    # Use list_devices() to list all your input devices
    device = 3

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=44100,
                    input=True,
                    frames_per_buffer=chunk,
                    input_device_index=device)

    print "Starting, use Ctrl+C to stop"
    try:
        #ser = serial.Serial(
        #port='com3',
        #timeout=1
        #)
        while True:
            data = stream.read(chunk)
            '''
            # Old RMS code, will only show the volume

            rms   = audioop.rms(data, 2)

            #level = min(rms / (2.0 ** 11) * scale, 1.0)
            level = max(min(rms / scale, 1.0), 0.0)
            level = level**exponent
            level = int(level * 255)

            print level
            #ser.write(chr(level))
            '''

            # Do FFT
            levels = calculate_levels(data, chunk, samplerate)
            algo = []
            # Make it look better and send to serial
            for level in levels:
                level = max(min(level / scale, 1.0), 0.0)
                level = level**exponent
                level = int(level * 255)

                algo.append(level)
                #print '>' * level,
                #sys.stdout.flush()
                #ser.write(chr(level))
            '''print '>' * algo[0] + '\r'
            print '>' * algo[1] + '\r'
            print '>' * algo[2] + '\r'
            print '>' * algo[3] + '\r'
            print '>' * algo[4] + '\r'
            print '>' * algo[5] + '\r',
            sys.stdout.flush()'''
            #s = ser.read(6)
            if (((algo[2] + algo[3] + algo[0] + algo[1]) / 2) >= 5):
                para.setData(2)
            else:
                para.setData(0)
    except KeyboardInterrupt:
        pass
    finally:
        print "\nStopping"
        stream.close()
        p.terminate()
예제 #22
0
    def worker(self):
        audio = pyaudio.PyAudio()

        print('\n*******************************************')
        print('RHAPSODY MODULE-I INPUT')
        print('*******************************************\n')
        print('\n===========================================')
        print('STARTED RECORDING')
        print('===========================================\n')

        for i in range(1, 4):
            print('\n===========================================')
            print(str(i) + '...')
            print('===========================================\n')
            sleep(1)

        stream = audio.open(format=self.FORMAT,
                            channels=self.CHANNELS,
                            rate=self.RATE,
                            input=True,
                            frames_per_buffer=self.CHUNK)

        f = []

        for i in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)):
            data = stream.read(self.CHUNK)
            f.append(data)

        print('\n===========================================')
        print('DONE RECORDING')
        print('===========================================\n')

        stream.stop_stream()
        stream.close()
        audio.terminate()

        wf = wave.open(self.WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(self.CHANNELS)
        wf.setsampwidth(audio.get_sample_size(self.FORMAT))
        wf.setframerate(self.RATE)
        wf.writeframes(b''.join(f))
        wf.close()
        """""" """""" """""" """""" """""" """""" """
        1 - Loading File
        """ """""" """""" """""" """""" """""" """"""
        filename = self.WAVE_OUTPUT_FILENAME
        y, sr = librosa.load(filename)
        """""" """""" """""" """""" """""" """""" """
        2 - Get Tempo == bpm
        """ """""" """""" """""" """""" """""" """"""
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
        print('\n===========================================')
        print('Estimated tempo: {:.2f} beats per minute'.format(tempo))
        print('===========================================\n')

        # generate csv files with beat times
        #CSV_FILENAME = self.WAVE_OUTPUT_FILENAME_NO_EXTENSION + ".csv"

        beat_times = librosa.frames_to_time(beat_frames, sr=sr)
        CSV_FILENAME = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "Recordings",
                         self.final + ".csv"))
        librosa.output.times_csv(CSV_FILENAME, beat_times)

        # WRITING A FILE WITH THE TEMPO
        #TEXT_FILENAME = self.WAVE_OUTPUT_FILENAME_NO_EXTENSION + ".txt"
        TEXT_FILENAME = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "Recordings",
                         self.final + ".txt"))
        bpm_value = open(TEXT_FILENAME, 'w')
        tempo_text = str(tempo) + '\n'
        bpm_value.write(tempo_text)
        """""" """""" """""" """""" """""" """""" """
        3 - Get Notes
        """ """""" """""" """""" """""" """""" """"""
        hz = librosa.feature.chroma_cqt(y=y, sr=sr)

        ## GET STRONGEST OCTAVE
        strongestOctave = 0
        strongestOctave_sum = 0
        for octave in range(len(hz)):
            sum = 0
            for frame in hz[octave]:
                sum = sum + frame
            if sum > strongestOctave_sum:
                strongestOctave_sum = sum
                strongestOctave = octave

        ## GET HEIGHEST HZ FOR EACH TIME FRAME
        strongestHz = []
        for i in range(len(hz[0])):
            strongestHz.append(0)

        notes = []
        for i in range(len(hz[0])):
            notes.append(0)

        for frame_i in range(len(hz[0])):
            strongest_temp = 0
            for octave_i in range(len(hz)):

                if hz[octave_i][frame_i] > strongest_temp:
                    strongest_temp = hz[octave_i][frame_i]
                    strongestHz[frame_i] = octave_i + 1
                    notes[frame_i] = librosa.hz_to_note(hz[octave_i][frame_i])

        # C C# D D# E F F# G G# A  A# B
        # 1 2  3 4  5 6 7  8 9  10 11 12
        strongestHz_sum = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        for note in strongestHz:
            strongestHz_sum[note - 1] = strongestHz_sum[note - 1] + 1

        for i in range(len(strongestHz_sum)):
            strongestHz_sum[i] = float(strongestHz_sum[i]) / len(strongestHz)

        noteSorted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        for num in range(len(noteSorted)):
            biggest = strongestHz_sum.index(max(strongestHz_sum))
            noteSorted[num] = biggest + 1
            strongestHz_sum[biggest] = strongestHz_sum[biggest] - 0.25

        for note in noteSorted:
            noteString = str(note) + '\n'
            bpm_value.write(noteString)

        bpm_value.close()

        print('\n===========================================')
        print('RECORDING ANALYSIS COMPLETED SUCCESSFULLY!!!')
        print('===========================================\n')

        self.finished.emit()
예제 #23
0
파일: rhino_demo.py 프로젝트: zexee/rhino
    def run(self):
        """
         Creates an input audio stream, initializes wake word detection (Porcupine) and speech to intent (Rhino)
         engines, and monitors the audio stream for occurrences of the wake word and then infers the intent from speech
         command that follows.
         """

        porcupine = None
        rhino = None
        pa = None
        audio_stream = None

        wake_phrase_detected = False
        intent_extraction_is_finalized = False

        try:
            porcupine = Porcupine(
                library_path=self._porcupine_library_path,
                model_file_path=self._porcupine_model_file_path,
                keyword_file_paths=[self._porcupine_keyword_file_path],
                sensitivities=[self._porcupine_sensitivity])

            rhino = Rhino(
                library_path=self._rhino_library_path,
                model_file_path=self._rhino_model_file_path,
                context_file_path=self._rhino_context_file_path)

            print()
            print('****************************** context ******************************')
            print(rhino.context_expressions)
            print('*********************************************************************')
            print()

            pa = pyaudio.PyAudio()

            audio_stream = pa.open(
                rate=porcupine.sample_rate,
                channels=1,
                format=pyaudio.paInt16,
                input=True,
                frames_per_buffer=porcupine.frame_length,
                input_device_index=self._input_device_index)

            # NOTE: This is true now and will be correct possibly forever. If it changes the logic below need to change.
            assert porcupine.frame_length == rhino.frame_length

            while True:
                pcm = audio_stream.read(porcupine.frame_length)
                pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)

                if self._output_path is not None:
                    self._recorded_frames.append(pcm)

                if not wake_phrase_detected:
                    wake_phrase_detected = porcupine.process(pcm)
                    if wake_phrase_detected:
                        print('detected wake phrase')
                elif not intent_extraction_is_finalized:
                    intent_extraction_is_finalized = rhino.process(pcm)
                else:
                    if rhino.is_understood():
                        intent, slot_values = rhino.get_intent()
                        print()
                        print('intent: %s' % intent)
                        print('---')
                        for slot, value in slot_values.items():
                            print('%s: %s' % (slot, value))
                        print()
                    else:
                        print("didn't understand the command")

                    rhino.reset()
                    wake_phrase_detected = False
                    intent_extraction_is_finalized = False

        except KeyboardInterrupt:
            print('stopping ...')

        finally:
            if porcupine is not None:
                porcupine.delete()

            if rhino is not None:
                rhino.delete()

            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if self._output_path is not None and len(self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16)
                soundfile.write(self._output_path, recorded_audio, samplerate=porcupine.sample_rate, subtype='PCM_16')
예제 #24
0

def play_the_recording(file_path):
    p = vlc.MediaPlayer(output_file)
    p.play()


if __name__ == '__main__':
    filename = 'sample.wav'
    while True:
        print()
        command = input('Enter q to stop. Otherwise, press any key: ')
        if command == 'q':
            break

        player = pyaudio.PyAudio()
        record_to_file(filename, player)
        player.terminate()

        # Speech-to-text: Uzbek language
        texts = speech_to_text(filename, 'uz')

        # Translations Uzbek -> English
        textEn = translate_text(texts[0], 'uz', 'en')

        # Translation English -> Uzbek
        textUz = translate_text(textEn, 'en', 'uz')

        # Text-to-Speech: Uzbek language is not supported. Using English instead.
        output_file = text_to_speech(textUz, 'en')
예제 #25
0
def localize():
    global switch_beamforming
    global DO_BEAMFORM
    # Setup search space
    source_plane = OrientedSourcePlane(SOURCE_PLANE_NORMAL, SOURCE_PLANE_UP,
                                       SOURCE_PLANE_OFFSET)
    space = SearchSpace(MIC_LOC, CAMERA_LOC, [source_plane])

    # Setup pyaudio instances
    pa = pyaudio.PyAudio()
    helper = AudioHelper(pa)
    localizer = KalmanTrackingLocalizer(mic_positions=mic_layout,
                                        search_space=space,
                                        mic_forward=MIC_FORWARD,
                                        mic_above=MIC_ABOVE,
                                        trans_mat=STATE_TRANSITION_MAT,
                                        state_cov=STATE_TRANSITION_MAT,
                                        emission_mat=EMISSION_MAT,
                                        emission_cov=EMISSION_COV,
                                        dft_len=FFT_LENGTH,
                                        sample_rate=SAMPLE_RATE,
                                        n_theta=N_THETA,
                                        n_phi=N_PHI)
    beamformer = BeamFormer(mic_layout, SAMPLE_RATE)

    # Setup STFT object
    stft = StftManager(dft_length=FFT_LENGTH,
                       window_length=WINDOW_LENGTH,
                       hop_length=HOP_LENGTH,
                       use_window_fcn=True,
                       n_channels=NUM_CHANNELS_IN,
                       dtype=DATA_TYPE)

    # Setup devices
    in_device = helper.get_input_device_from_user()
    if PLAY_AUDIO:
        out_device = helper.get_output_device_from_user()
    else:
        out_device = helper.get_default_output_device_info()

    # Setup streams
    in_stream = pa.open(rate=SAMPLE_RATE,
                        channels=NUM_CHANNELS_IN,
                        format=SAMPLE_TYPE,
                        frames_per_buffer=FRAMES_PER_BUF,
                        input=True,
                        input_device_index=int(in_device['index']),
                        stream_callback=read_in_data)
    out_stream = pa.open(rate=SAMPLE_RATE,
                         channels=NUM_CHANNELS_OUT,
                         format=SAMPLE_TYPE,
                         output=True,
                         frames_per_buffer=FRAMES_PER_BUF,
                         output_device_index=int(out_device['index']),
                         stream_callback=write_out_data)

    # Start recording/playing back
    in_stream.start_stream()
    out_stream.start_stream()

    # Start thread to check for user quit
    quit_thread = threading.Thread(target=check_for_quit)
    quit_thread.start()

    # Setup directions and alignment matrices
    direcs = localizer.get_directions()
    align_mats = localizer.get_pos_align_mat()

    # Plotting setup
    if PLOT_POLAR:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='polar')
        ax.set_rlim(0, 1)
        plt.show(block=False)
        # Setup space for plotting in new coordinates
        spher_coords = localizer.get_spher_directions()
        theta = spher_coords[1, :]
        pol_plot, = plt.plot(theta, np.ones(theta.shape))
        post_plot, = plt.plot(theta, np.ones(theta.shape), 'green')
        ax.set_ylim(0, 1)
        if DO_BEAMFORM:
            pol_beam_plot, = plt.plot(theta, np.ones(theta.shape), 'red')
    if PLOT_CARTES:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        plt.show(block=False)
        x = localizer.to_spher_grid(direcs[0, :])
        y = localizer.to_spher_grid(direcs[1, :])
        z = localizer.to_spher_grid(direcs[2, :])
        #scat = ax.scatter(x, y, z, s=100)
    if EXTERNAL_PLOT:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.show(block=False)

    count = 0
    try:
        global done
        while in_stream.is_active() or out_stream.is_active():
            data_available = in_buf.wait_for_read(WINDOW_LENGTH, TIMEOUT)
            if data_available:
                if switch_beamforming:
                    DO_BEAMFORM = not DO_BEAMFORM
                    switch_beamforming = False
                    # Get data from the circular buffer
                data = in_buf.read_samples(WINDOW_LENGTH)
                # Perform an stft
                stft.performStft(data)
                # Process dfts from windowed segments of input
                dfts = stft.getDFTs()
                rffts = mat.to_all_real_matlab_format(dfts)
                d, energy = localizer.get_distribution_real(
                    rffts[:, :, 0], 'gcc')  # Use first hop
                post = localizer.get_distribution(rffts[:, :, 0])
                ind = np.argmax(post)
                u = 1.5 * direcs[:, ind]  # Direction of arrival
                #if energy < 500:
                #    continue

                # Do beam forming
                if DO_BEAMFORM:
                    align_mat = align_mats[:, :, ind]
                    filtered = beamformer.filter_real(rffts, align_mat)
                    mat.set_dfts_real(dfts, filtered, n_channels=2)

                # Take care of plotting
                if count % 1 == 0:
                    if PLOT_CARTES:
                        ax.cla()
                        ax.grid(False)
                        d = localizer.to_spher_grid(
                            post / (np.max(post) + consts.EPS))
                        #d = localizer.to_spher_grid(d / (np.max(d) + consts.EPS))
                        ax.scatter(x, y, z, c=d, s=40)
                        #ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolor=plt.cm.gist_heat(d))
                        ax.plot([0, u[0]], [0, u[1]], [0, u[2]],
                                c='black',
                                linewidth=3)
                        if DO_BEAMFORM:
                            if np.max(np.abs(response)) > 1:
                                response /= np.max(np.abs(response))
                            X = response * x
                            Y = response * y
                            Z = response * z
                            ax.plot_surface(X,
                                            Y,
                                            Z,
                                            rstride=1,
                                            cstride=1,
                                            color='white')
                        ax.set_xlim(-1, 1)
                        ax.set_ylim(-1, 1)
                        ax.set_zlim(0, 1)
                        #ax.view_init(90, -90)
                        fig.canvas.draw()
                    if PLOT_2D:
                        # Get unconditional distribution
                        dist = localizer.to_spher_grid(d)
                        dist -= np.min(dist)
                        dist /= (np.sum(dist) + consts.EPS)
                        sample_mat[:, :-1] = sample_mat[:, 1:]
                        sample_mat[:, -1] = dist
                        # Get kalman estimate
                        maxind = np.argmax(post)
                        estimate_mat[:-1] = estimate_mat[1:]
                        estimate_mat[-1] = maxind
                        plot_2d.set_array(sample_mat)
                        state_est_plot.set_ydata(estimate_mat)
                        plt.draw()
                count += 1

                # Get the istft of the processed data
                if PLAY_AUDIO or RECORD_AUDIO:
                    new_data = stft.performIStft()
                    new_data = out_buf.reduce_channels(new_data,
                                                       NUM_CHANNELS_IN,
                                                       NUM_CHANNELS_OUT)
                    # Write out the new, altered data
                    if PLAY_AUDIO:
                        if out_buf.get_available_write() >= WINDOW_LENGTH:
                            out_buf.write_samples(new_data)
                    if RECORD_AUDIO:
                        if record_buf.get_available_write() >= WINDOW_LENGTH:
                            record_buf.write_samples(new_data)

    except KeyboardInterrupt:
        print "Program interrupted"
        done = True

    print "Cleaning up"
    in_stream.stop_stream()
    in_stream.close()
    out_stream.stop_stream()
    out_stream.close()
    pa.terminate()

    # Take care of output file
    if RECORD_AUDIO:
        print "Writing output file"
        make_wav()

    print "Done"
예제 #26
0
    def UploadSignal(self):

        self.ui.GVOriginal.clear()
        filePaths = QtWidgets.QFileDialog.getOpenFileNames(
            self, 'Open File', "~/Desktop/sigViews", '*.wav')

        for filePath in filePaths:
            for self.f in filePath:
                if self.f == '*':
                    break
                p = pyaudio.PyAudio()
                self.waveFile = wave.open(self.f, 'rb')

                # wav1 = wave.open(f,'rb')
                # self.ywav1=wav1.readframes(-1)
                # self.ywav1 =np.fromstring(self.ywav1,'Int16')
                # fs=wav1.getframerate()
                # self.xwav1=np.linspace(0,len(self.ywav1)/fs,num=len(self.ywav1))
                # print("length of signal")
                # print(len(self.xwav1))

                # self.ui.GVOriginal.plot(self.xwav1,self.ywav1, pen='b')

                self.format = p.get_format_from_width(
                    self.waveFile.getsampwidth())

                channel = self.waveFile.getnchannels()
                self.rate = self.waveFile.getframerate()
                self.frame = self.waveFile.getnframes()
                self.stream = p.open(
                    format=self.format,  # DATA needed for streaming
                    channels=channel,
                    rate=self.rate,
                    output=True)

                #durationF = self.frame / float(self.rate)
                self.data_int = self.waveFile.readframes(self.frame)
                self.data_plot = np.fromstring(self.data_int, 'Int16')
                self.data_plot.shape = -1, 2

                self.data_plot = self.data_plot.T  # Y-axis
                self.ywav1 = self.data_plot
                print('original data', self.ywav1)

                self.time = np.arange(0, self.frame) * (1.0 / self.rate
                                                        )  #X-axis
                self.xwav1 = self.time
                #fft_frame = np.fft.rfft(current_frame)

                self.ywav1min = np.nanmin(self.ywav1[1])
                self.ywav1max = np.nanmax(self.ywav1[1])
                self.ui.GVOriginal.setXRange(self.xwav1[0], self.xwav1[-1])
                self.ui.GVOriginal.plotItem.getViewBox().setLimits(
                    xMin=self.xwav1[0],
                    xMax=self.xwav1[-1],
                    yMin=self.ywav1min - self.ywav1min * 0.1,
                    yMax=self.ywav1max + self.ywav1max * 0.1)

                self.ui.GVOriginal.plot(self.xwav1, self.ywav1[1], pen='b')

                #===============================================Fourier Transform===============================================#

                self.fs_rate, self.spf = wavfile.read(self.f)

                # print("araay",self.spf.shape)
                print("Frequency sampling", self.fs_rate)

                l_audio = len(self.spf.shape)
                print("Channels", l_audio)
                if l_audio == 2:
                    self.spf = self.spf.mean(
                        axis=1)  # To make it a mono signal, 1 channel only

                N = self.spf.shape[0]  # Give number of rows
                print("complete Sampling N", N)
                secs = N / float(self.fs_rate)
                print("secs", secs)
                Ts = 1.0 / self.fs_rate  # sampling interval in time
                print("Timestep between Ts", Ts)
                t = scipy.arange(0, secs, Ts)

                self.FFT = abs(scipy.fft(self.spf))

                self.freqs = scipy.fftpack.fftfreq(
                    self.spf.size, t[1] - t[0]
                )  # Return the Discrete Fourier Transform sample frequencies. t[1]-t[0] is the sample spacing

                FFT_side = self.FFT[range(
                    N // 2
                )]  # one side FFT range, remove the negative part (starts from zero)
                self.FFT_sideArr = np.array(FFT_side)

                self.bands = np.array_split(self.FFT_sideArr, 10)
                # self.bands=np.array_split(self.FFT,20)
                print('lehgth of band', int(len(self.bands[1])))

                self.BandSize = int(len(self.FFT_sideArr) / 10)

                self.phase = np.angle(scipy.fft(
                    self.spf))  #phase, we will use it later

                freqs_side = self.freqs[range(N // 2)]
                self.fft_freqs_side = np.array(freqs_side)

                self.ui.GVFourier.plot(self.freqs, self.FFT, pen='g')
                QtCore.QCoreApplication.processEvents()
예제 #27
0
print('Press Ctrl-C to quit.')
last_touched = cap.touched()
record_state = True
is_recording = [
    False, False, False, False, False, False, False, False, False, False,
    False, False
]
is_playing = [
    False, False, False, False, False, False, False, False, False, False,
    False, False
]
my_thread_record = None
my_thread_play = None
is_pygame_init = False
my_thread_init = None
audio = pyaudio.PyAudio()  # create pyaudio instantiation


def loop_record2(x):
    form_1 = pyaudio.paInt16  # 16-bit resolution
    chans = 1  # 1 channel
    samp_rate = 44100  # 44.1kHz sampling rate
    chunk = 4096  # 2^12 samples for buffer
    record_secs = 120  # seconds to record
    dev_index = 2  # device index found by p.get_device_info_by_index(ii)
    file_array = [
        'test0.wav', 'test1.wav', 'test2.wav', 'test3.wav', 'test4.wav',
        'test5.wav', 'test6.wav', 'test7.wav', 'test8.wav', 'test9.wav',
        'test10.wav'
    ]
    wav_output_filename = file_array[x]  # name of .wav file
def speakerRecog():
    #Recording Phase
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    RECORD_SECONDS = 5
    WAVE_OUTPUT_FILENAME = "./SR/samples/test.wav"

    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("* recording")

    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("* done recording")

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

    #Now the recording is stored in test.wav
    #We will now test the recording with the gmm models

    source = "./SR/samples/"
    modelpath = "./SR/gmm_models/"
    test_file = "./SR/testing_sample_list.txt"
    file_paths = open(test_file, 'r')

    gmm_files = [
        os.path.join(modelpath, fname) for fname in os.listdir(modelpath)
        if fname.endswith(".gmm")
    ]
    print(gmm_files)

    #Load the Gaussian gender Models
    models = [cPickle.load(open(fname, 'rb')) for fname in gmm_files]
    speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files]

    # Read the test directory and get the list of test audio files
    for path in file_paths:

        path = path.strip()
        print(path)
        sr, audio = read(source + path)
        vector = extract_features(audio, sr)

        log_likelihood = np.zeros(len(models))

    for i in range(len(models)):
        gmm = models[i]  #checking with each model one by one
        scores = np.array(gmm.score(vector))
        log_likelihood[i] = scores.sum()

    winner = np.argmax(log_likelihood)
    print("\tdetected as - ", speakers[winner])
    return speakers[winner]
    time.sleep(1.0)
예제 #29
0
max_fps = 60
width_of_col = 1
scale = 1
skip_under = 0
file_path = "./File0161.wav"

#Init
pygame.init()
screen = pygame.display.set_mode(size)
pygame.display.set_caption("AV")
done = False
clock = pygame.time.Clock()

wf = wave.open(file_path, "rb")

p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                channels=wf.getnchannels(),
                rate=wf.getframerate(),
                output=True,
                input=True)

while not done:
    # --- Main event loop
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True

    temp = wf.readframes(chunk)

    if len(temp) < 4096:
예제 #30
0
    def GoRun(self):  #main button callback for collecting new data
        self.Status = True
        self.fs = 44100  #set sample rate, default to 44100
        iters = 1000  # (mostly) deprecated
        chunkSize = 8192  #number of samples to read in at once
        windowSize = 3  #number of seconds to plot at once
        numSamples = iters * chunkSize

        #set up an audio stream
        p = pyaudio.PyAudio()
        audioStream = p.open(format=pyaudio.paInt16,
                             channels=1,
                             rate=self.fs,
                             input=True,
                             frames_per_buffer=chunkSize)

        #empty out the recording
        self.Recording = np.zeros(numSamples, dtype=np.int16)
        self.Formants = np.zeros((100, 5), dtype=np.float32)
        self.FormantTime = np.zeros(100, dtype=np.float32)
        self.Pitch = np.zeros(100, dtype=np.float32)
        self.PitchTime = np.zeros(100, dtype=np.float32)
        FormantCount = 0
        PitchCount = 0

        #set up our axes
        ax = self.RawPlot.figure.add_subplot(111)
        f0ax = self.FundamentalFrequenncyPlot.figure.add_subplot(111)
        f0ax.tick_params(axis='x',
                         which='both',
                         bottom=False,
                         top=False,
                         labelbottom=False)
        f0ax.set_position([0.35, 0.05, 0.6, 0.93])
        formantAx = self.FormantPlot.figure.add_subplot(111)
        tractAx = self.VocalTractPlot.figure.add_subplot(111)
        tractAx.tick_params(axis='x',
                            which='both',
                            bottom=False,
                            top=False,
                            labelbottom=False)
        tractAx.set_position([0.35, 0.05, 0.6, 0.93])
        tractAx.set_ylabel('Vocal Tract Length (cm)')
        tractAx.set_ylim((0, 25))
        tractAx.set_xlim((0, 0.8))

        c = 34300  # speed of sound in cm/s
        maxPitchLag = 3
        maxVocalLag = 3
        ds_rate = 3
        #set up time vector
        print('Beginning New Recording')
        time = np.linspace(0, numSamples / self.fs, numSamples)
        i = 0
        try:  #using try/except to enable keyboard interrupt
            start = ti.time()
            while self.Status:  #keep going forever, or until keyboard interrupt
                t = (i + 1) * chunkSize

                if t > len(self.Recording
                           ):  # add space to the recording in necessary
                    extraSpace = np.zeros(numSamples, dtype=np.int16)
                    self.Recording = np.concatenate(
                        [self.Recording, extraSpace], axis=None)
                    time = np.linspace(0,
                                       len(self.Recording) / self.fs,
                                       len(self.Recording))

                # pull a chunk from our audio stream
                data = PyAudioTest.getChunk(chunkSize, audioStream, Random=0)
                data_ds = data[0:chunkSize:ds_rate]  # downsample of data
                # its generally a good idea to lowpass filter before downsampling,
                # but to save computational time this is skipped here.
                # our data is ~mostly~ band-limited, so I don't expect this to be huge problem

                # add chunk to our recording
                self.Recording[i * chunkSize:(i + 1) * chunkSize] = data

                # get f0 and update f0 plot
                # use my hack method for getting f0
                #clipData = PyAudioTest.centerClip(data)
                #acf = PyAudioTest.autocorr(clipData)
                #f0 = PyAudioTest.getF0(acf, self.fs)

                # use yin implementation instead
                # yin's original implementation called for filtering,
                # which we have not yet implemented for computational reasons
                data_hamming = data * np.hamming(chunkSize)
                df = yin.differenceFunction(data_hamming, chunkSize,
                                            self.fs / 75)
                cmndf = yin.cumulativeMeanNormalizedDifferenceFunction(
                    df, len(df))
                f0 = yin.getPitch(cmndf,
                                  self.fs / 500,
                                  self.fs / 75,
                                  harmo_th=0.35)

                if f0:  # if f0 is detected, update our graph
                    # store ot pitch and time
                    self.Pitch[PitchCount] = 1.0 * self.fs / f0
                    self.PitchTime[PitchCount] = 1.0 * (
                        t - chunkSize / 2) / self.fs
                    PitchCount += 1
                    # add space if needed
                    if PitchCount >= len(self.PitchTime):
                        self.Pitch = np.concatenate(
                            (self.Pitch, np.zeros(200, dtype=np.float32)))
                        self.PitchTime = np.concatenate(
                            (self.PitchTime, np.zeros(200, dtype=np.float32)))

                    #get pitches from the last 3 seconds
                    RecentPitches = []
                    pitchIDX = PitchCount - 1
                    while self.PitchTime[pitchIDX] >= 1.0 * (
                            t - chunkSize /
                            2) / self.fs - maxPitchLag and pitchIDX >= 0:
                        RecentPitches.append(self.Pitch[pitchIDX])
                        pitchIDX -= 1

                    #get mean and std
                    meanPitch = np.mean(RecentPitches)
                    if len(RecentPitches) == 1:
                        stdPitch = 25
                    else:
                        stdPitch = np.std(RecentPitches)

                    #plot
                    f0ax.bar([0], [2.0 * stdPitch],
                             bottom=[meanPitch - stdPitch])
                    f0ax.set_ylabel('Fundamental Frequency (Hz)')
                    f0ax.set_ylim((0, 500))
                    f0ax.set_xlim((0, 0.8))
                    self.FundamentalFrequenncyPlot.draw()

                formantAx.clear()
                formantAx.hold(True)
                if f0:  # if f0 is detected search for formants
                    #make PSD
                    fBins, PSD = sp.signal.periodogram(data_ds,
                                                       self.fs / ds_rate)
                    PSD = 20 * np.log10(PSD)  #convert to dB
                    try:
                        Formants = FormantFinder.findFormantsLPC(
                            data_ds, self.fs /
                            ds_rate)  # look for formants using LPC method
                        for f in range(
                                len(Formants
                                    )):  # plot the formants as  vertical lines
                            formantAx.plot([Formants[f], Formants[f]],
                                           [-100, 75],
                                           color='red')

                        formantAx.plot(fBins, PSD)
                        formantAx.set_title('Power Spectrum - Formants')
                        formantAx.set_xlabel('Frequency (Hz)')
                        formantAx.set_ylabel('Power (dB)')
                        formantAx.set_ylim((-90, 90))
                        formantAx.set_xlim((0, 5000))
                        '''
                        formantAx.bar(range(len(Formants)), Formants)
                        formantAx.set_xlabel('Formant number')
                        formantAx.set_ylabel('Frequency (Hz)')
                        formantAx.set_title('Formants Frequencies')
                        formantAx.set_xlim((0, 4.8))
                        formantAx.set_ylim((0, 5000))
                        formantAx.set_xticks([0.4, 1.4, 2.4, 3.4, 4.4])
                        formantAx.set_xticklabels(['F1', 'F2', 'F3', 'F4', 'F5'])
                        '''
                        self.FormantPlot.draw()
                        formantAx.hold(False)

                        #store Formants
                        if len(Formants) >= 5:
                            self.Formants[FormantCount, 0:5] = Formants[0:5]
                        else:
                            self.Formants[FormantCount,
                                          0:len(Formants)] = Formants
                        self.FormantTime[FormantCount] = 1.0 * (
                            t - chunkSize / 2) / self.fs
                        FormantCount += 1
                        # add space if needed
                        if FormantCount >= len(self.FormantTime):
                            self.Formants = np.concatenate(
                                (self.Formants,
                                 np.zeros((200, 5), dtype=np.float32)))
                            self.FormantTime = np.concatenate(
                                (self.FormantTime,
                                 np.zeros(200, dtype=np.float32)))

                        #detect recent vocal tract lengths
                        RecentTractLength = []
                        tractIDX = FormantCount - 1
                        while self.FormantTime[tractIDX] >= 1.0 * (
                                t - chunkSize /
                                2) / self.fs - maxVocalLag and tractIDX >= 0:
                            RecentTractLength.append(
                                FormantFinder.getVocalTractLength(
                                    self.Formants[tractIDX, :],
                                    c,
                                    method='lammert'))
                            tractIDX -= 1

                        # get mean, std
                        meanTractLength = np.median(RecentTractLength)
                        if len(RecentTractLength) == 1:
                            stdTractLength = 2
                        else:
                            stdTractLength = np.std(RecentTractLength)

                        # plot bar
                        tractAx.bar([0], [2 * stdTractLength],
                                    bottom=[meanTractLength - stdTractLength])
                        tractAx.set_ylabel('Vocal Tract Length (cm)')
                        tractAx.set_ylim((0, 25))
                        tractAx.set_xlim((0, 0.8))
                        self.VocalTractPlot.draw()

                    except (RuntimeError
                            ):  #formant detection can throw errors sometimes
                        Formants = np.zeros(3)

                else:  # if no f0, basically do nothing
                    fBins = np.linspace(0, self.fs / 2, 10)
                    PSD = np.zeros(10)

                #update our raw data plot, but only everyother chunk, because its time consuming
                if t > windowSize * self.fs and i % 3 == 0:
                    ax.plot(time[t - windowSize * self.fs:t],
                            self.Recording[t - windowSize * self.fs:t])
                    ax.set_title('Raw Waveform')
                    ax.set_xlabel('Time (s)')
                    ax.set_ylabel('amplitude')
                    self.RawPlot.draw()
                i += 1

                #check for incoming button clicks i.e. stop button
                QtCore.QCoreApplication.processEvents()

        except (
                KeyboardInterrupt, SystemExit
        ):  # in case of a keyboard interrupt or system exit, clean house
            self.FormantPlot.draw()
            self.RawPlot.draw()
            self.FundamentalFrequenncyPlot.draw()
            self.Pitch = self.Pitch[0:PitchCount]
            self.PitchTime = self.PitchTime[0:PitchCount]
            self.Formants = self.Formants[0:FormantCount, :]
            self.FormantTime = self.FormantTime[0:FormantCount]
            print('Recording Completed')
            self.Recording = self.Recording[0:t]
            print('recorded time is')
            print(1.0 * t / self.fs)
            print('elapsed time is:')
            print(ti.time() - start)
            return True

        self.Pitch = self.Pitch[0:PitchCount]
        self.PitchTime = self.PitchTime[0:PitchCount]
        self.Formants = self.Formants[0:FormantCount, :]
        self.FormantTime = self.FormantTime[0:FormantCount]
        print('Recording Completed')
        self.Recording = self.Recording[0:t]
        print('recorded time is')
        print(1.0 * t / self.fs)
        print('elapsed time is:')
        print(ti.time() - start)
        return True