def main(): pkt_tx_queue = [] def audio_frame_ready(data): pkt_tx_queue.append(data) lora_ctl = lora.LoRaController() spk = speaker.Speaker(DAC('P22')) adc = ADC() apin = adc.channel(pin='P13', attn=ADC.ATTN_11DB) uphone = microphone.Microphone(apin, audio_frame_ready) tlk_btn = talk_button.TalkButton(uphone) print('Started ...') flash(0x007f00) # green while True: Timer.sleep_us(1000) # Handle the RX packets # TODO: refactor to use callback mechanism. while True: data = lora_ctl.recv() if data: spk.enque(data) else: break # Handle the TX queue # TODO: refactor to use Python synchronized Queue. while pkt_tx_queue: data = pkt_tx_queue.pop(0) print('.') lora_ctl.send(data)
def start_recording(self): """ Start recording audio from the microphone nd classify the audio. Note we use a background thread to process the audio and we setup a UI animation function to draw the sliding spectrogram image, this way the UI update doesn't interfere with the smoothness of the microphone readings """ if self.microphone is None: self.microphone = microphone.Microphone(False) self.stop() num_channels = 1 self.microphone.open(self.featurizer.input_size, self.sample_rate, num_channels, self.input_device) def update_func(frame_index): # this is an animation callback to update the UI every 33 milliseconds. self.process_output() self.set_spectrogram_image() if not self.reading_input: self.after(1, self.on_stopped) return (self.spectrogram_image,) if self.animation: self.animation.event_source.stop() self.reading_input = True # Start animation timer for updating the UI (e.g. spectrogram image) (30 fps is usually fine) self.animation = animation.FuncAnimation(self.features_figure, update_func, interval=33, blit=True) # start background thread to read and classify the recorded audio. self.featurizer.open(self.microphone) self.read_input_thread = Thread(target=self.on_read_features, args=()) self.read_input_thread.daemon = True self.read_input_thread.start()
def run(self): self.mic = microphone.Microphone() self.FILE = self.mic.listen() self.NewSession.emit() if os.path.getsize(self.FILE) > 0: #讯飞语音识别 res = msc_wrapper.sendwav(self.FILE) if res and u'失败' in res: msc_wrapper.regEnd() msc_wrapper.regStart() else: if res: self.regResult.emit(res.decode('utf-8')) #Google语音识别 ''' url = 'http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=zh-CN&maxresults=10' audio=open(self.FILE,'rb').read() headers = {'Content-Type' : 'audio/L16; rate=16000'} try: req = urllib2.Request(url, audio, headers) response = urllib2.urlopen(req,timeout=10) print response.read().decode('UTF-8') exec('rest ='+ str(response.read().decode('UTF-8'))) if rest: if rest['status'] == 0: googlevoice.get(str(rest['hypotheses'][0]['utterance'])) print "translate end",self.ResultNum,self.FILE except Exception,e: print e ''' #res=requests.post(url,data=audio,timeout=0.3,headers=headers) #print res.text os.remove(self.FILE)
def __init__(self): threading.Thread.__init__(self) ip = getMyIP() port = 6000 self.serverSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.serverSock.bind((ip, port)) print 'My IP is ' + ip + ' and port used is ' + str(port) self.serverSock.listen(1) self.mic = microphone.Microphone() self.mic.start()
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_files, threshold, sample_rate, output_speaker=False, auto_scale=False, reset=False): predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING) transform = featurizer.AudioTransform(featurizer_model, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") the_speaker = None if output_speaker: the_speaker = speaker.Speaker() results = [] if wav_files: if not os.path.isdir(wav_files): raise Exception("--wav_files {} dir not found".format(wav_files)) file_list = os.listdir(wav_files) file_list.sort() for filename in file_list: ext = os.path.splitext(filename)[1] if ext != ".wav": print("Skipping non-wav file: ", filename) else: reader = wav_reader.WavReader(sample_rate, CHANNELS, auto_scale) path = os.path.join(wav_files, filename) print("opening ", path) reader.open(path, transform.input_size, the_speaker) result = get_prediction(reader, transform, predictor, categories) results += [result] if reset: predictor.reset() else: reader = microphone.Microphone(True, True) reader.open(transform.input_size, sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") result = get_prediction(reader, transform, predictor, categories) results += [result] return results
def listen(self, device='mic', nparray=None, file=None): import matplotlib.pyplot as plt Bd=self.BAUD fs=self.RATE carrier=self.CARRIER threshold=self.THRESHOLD bandwidth=self.BANDWIDTH N=self.FILTER_SIZE sintonia=self.SINTONIA if nparray or file is None: mic = microphone.Microphone() chunk = round(fs/Bd) try: S = np.array([]) while True: print('Procurando sinal... ', end='\r', flush=True) data = np.array(mic.get_mic_data(chunk=chunk)) tone = data * (2**15 - 1) / np.max(np.abs(data)) tone = tone.astype(np.int16) if fsk.sintonizado(tone, fs, 3400, 20, N, sintonia): print(f'### BAUD {Bd} @ CARRIER {carrier} Hz') break while True: print('Recebendo mensagem... ', end='\r', flush=True) data = np.array(mic.get_mic_data(chunk=chunk)) tone = data * (2**15 - 1) / np.max(np.abs(data)) tone = tone.astype(np.int16) if fsk.sintonizado(tone, fs, 3800, 20, N, sintonia): S = np.append(S, tone) C, encoded_msg = fsk.demodulate(S, fs, Bd, carrier, threshold, bandwidth, N) msg = fsk.decode_sanduiche(encoded_msg) msg = fsk.decode_ascii(msg) self.MESSAGE = msg print(f"Mensagem recebida: {msg}") print("Fim da transmissão") break else: S = np.append(S, tone) except KeyboardInterrupt: print('Transmissão encerrada') mic.close() self.ENCODED_SIGNAL = S if nparray is not None: C, encoded_msg = fsk.demodulate(nparray, fs, Bd, carrier, threshold, bandwidth, N) self.MESSAGE = fsk.decode_ascii(encoded_msg) self.ENCODED_SIGNAL = C print(self.MESSAGE, flush=True, end='')
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_file, threshold, sample_rate, output_speaker=False): predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING) transform = featurizer.AudioTransform(featurizer_model, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") # set up inputs and outputs if wav_file: the_speaker = None if output_speaker: the_speaker = speaker.Speaker() reader = wav_reader.WavReader(sample_rate, CHANNELS) reader.open(wav_file, transform.input_size, the_speaker) else: reader = microphone.Microphone(True) reader.open(transform.input_size, sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") transform.open(reader) results = None try: while True: feature_data = transform.read() if feature_data is None: break else: prediction, probability, label = predictor.predict(feature_data) if probability is not None: if not results or results[1] < probability: results = (prediction, probability, label) percent = int(100 * probability) print("<<< DETECTED ({}) {}% '{}' >>>".format(prediction, percent, label)) except KeyboardInterrupt: pass transform.close() average_time = predictor.avg_time() + transform.avg_time() print("Average processing time: {}".format(average_time)) if results is None: raise Exception("test_keyword_spotter failed to find any predictions!") return tuple(list(results) + [average_time])
def run(self): self.mic = microphone.Microphone() self.FILE=self.mic.listen() self.NewSession.emit() url = 'http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=zh-CN&maxresults=10' audio=open(self.FILE,'rb').read() headers = {'Content-Type' : 'audio/L16; rate=16000'} try: req = urllib2.Request(url, audio, headers) response = urllib2.urlopen(req,timeout=10) print response.read().decode('UTF-8') exec('rest ='+ str(response.read().decode('UTF-8'))) if rest: if rest['status'] == 0: googlevoice.get(str(rest['hypotheses'][0]['utterance'])) print "translate end",self.ResultNum,self.FILE except Exception,e: print e
def run(self): self.mic = microphone.Microphone() self.FILE = self.mic.listen() self.NewSession.emit() url = 'http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=zh-CN&maxresults=10' audio = open(self.FILE, 'rb').read() headers = {'Content-Type': 'audio/L16; rate=16000'} try: req = urllib2.Request(url, audio, headers) response = urllib2.urlopen(req, timeout=10) print response.read().decode('UTF-8') print "translate end", self.ResultNum, self.FILE except: print u"网络状况不好或者其他错误发生" #res=requests.post(url,data=audio,timeout=0.3,headers=headers) #print res.text os.remove(self.FILE)
def main(rate, channels, duration): sample_width = 2 # 16 bit mic = microphone.Microphone(auto_scale=False) mic.open(1024, rate, channels) start = time.time() print("Recording for {} seconds...".format(duration)) with wave.open("audio.wav", "wb") as writer: writer.setnchannels(channels) writer.setsampwidth(sample_width) writer.setframerate(rate) while time.time() < start + duration: buffer = mic.read() int16 = buffer.astype(np.int16) writer.writeframes(int16) print("finished, press ctrl+c")
def audio_loopback(): def handle_audio(data): spk.enque(data) int_mode = False spk = speaker.Speaker(DAC('P22'), int_mode=int_mode, debug=False) adc = ADC() apin = adc.channel(pin='P13', attn=ADC.ATTN_11DB) uphone = microphone.Microphone(apin, handle_audio, int_mode=int_mode) tlk_btn = talk_button.TalkButton(uphone) print('Audio playpack ...') flash(0x000010) # Dark blue while True: if int_mode: Timer.sleep_us(1000000) else: uphone.loop() spk.loop()
def start_recording(self): """ Start recording audio from the microphone nd classify the audio. Note we use a background thread to process the audio and we setup a UI animation function to draw the sliding spectrogram image, this way the UI update doesn't interfere with the smoothness of the microphone readings """ self.stop() input_channel = None if self.serial_port: import serial_reader self.serial = serial_reader.SerialReader(0.001) self.serial.open(self.featurizer.input_size, self.serial_port) input_channel = self.serial else: if self.microphone is None: self.microphone = microphone.Microphone( auto_scale=self.auto_scale, console=False) num_channels = 1 self.microphone.open(self.featurizer.input_size, self.sample_rate, num_channels, self.input_device) input_channel = self.microphone def update_func(frame_index): return self.on_ui_update() if self.animation: self.animation.event_source.stop() self.reading_input = True # Start animation timer for updating the UI (e.g. spectrogram image) self.animation = self.spectrogram_widget.begin_animation(update_func) # start background thread to read and classify the recorded audio. self.featurizer.open(input_channel) self.read_input_thread = Thread(target=self.on_read_features, args=()) self.read_input_thread.daemon = True self.read_input_thread.start()
def __init__(self): self._microphone = microphone.Microphone() self._speech = speech.SpeechRecognizer(self._microphone) self._lights = lights.Lights() self._keywords = commands.Dispatcher() self._commands = commands.Dispatcher(junk=config.GRAMMAR_JUNK) self._happiness = 0 # Value that goes from -3 to 3 self._brightness = 2 # Value that goes from 0 to 3 self._hue = 0.0 self._animations = collections.deque() self._push_animation(self._idle_animation()) self._listen_animation = self._create_pulse_animation(80.0, 2.0) self._state_lock = threading.RLock() # Configure the keywords and their associated callbacks. for w in config.WAKE_WORDS: self._keywords.register(w, self._wake) for w in config.HAPPY_WORDS: self._keywords.register(w, self._increment_happiness, 1) for w in config.SAD_WORDS: self._keywords.register(w, self._increment_happiness, -1) # Configure the command parsing based on Jackson's grammar (from # commands.gram). Right now this is all manual configuration and # care must be taken to ensure the logic below and commands.gram # are kept up to date. self._commands.register('wink', self._wink) self._commands.register('spectrum', self._spectrum) self._commands.register('sparkle', self._sparkle) self._commands.register('knight rider', self._knight_rider) self._commands.register('brighter', self._increment_brightness, 1) self._commands.register('dimmer', self._increment_brightness, -1) self._commands.register_starts_with('show me', self._change_animation) self._commands.register_starts_with('light up', self._change_color) self._commands.register_starts_with('change', self._change) self._commands.register_starts_with('set', self._change) self._commands.register_starts_with('update', self._change) self._commands.register_starts_with('modify', self._change) self._commands.register_starts_with('make', self._change)
def __init__(self, IP, port): threading.Thread.__init__(self) self.clientSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.clientSock.connect((IP, port)) self.mic = microphone.Microphone() self.mic.start()
def listen(self, device='mic', nparray=None, file=None): import matplotlib.pyplot as plt Bd = self.BAUD fs = self.RATE carrier = self.CARRIER threshold = self.THRESHOLD bandwidth = self.BANDWIDTH N = self.FILTER_SIZE if nparray or file is None: mic = microphone.Microphone() chunk = round(fs / Bd) * 8 # print(f"### BAUD {Bd} @ CARRIER {carrier} Hz") # data = np.array(mic.get_mic_data(chunk)) # C, encoded_msg = fsk.demodulate(data, fs, Bd, carrier, threshold, bandwidth, N) # print(encoded_msg, len(C), chunk) # plt.plot(data) # plt.show() # self.MESSAGE = fsk.decode_ascii(encoded_msg) # print(self.MESSAGE, flush=True, end='') try: S = [] while True: print('Procurando sinal... ', end='\r', flush=True) data = np.array(mic.get_mic_data(chunk=chunk)) # tone = tone.astype(np.int16) tone = data * (2**15 - 1) / np.max(np.abs(data)) if fsk.sintonizado(tone, fs, 3400, 200, 500, 5): print(f'### BAUD {BD} @ CARRIER {fs} Hz') break else: continue while True: data = np.array(mic.get_mic_data(chunk=chunk)) tone = data * (2**15 - 1) / np.max(np.abs(data)) # tone = tone.astype(np.int16) C, encoded_msg = fsk.demodulate(tone, fs, Bd, carrier, threshold, bandwidth, N) byte = fsk.decode_ascii(encoded_msg) # time.sleep(1/Bd) if '§' in byte: break else: print(byte, end='', flush=True) # print(len(encoded_msg), flush=True, end='\n') if fsk.sintonizado(data, 44100, 3800, 200, 500, 5): print("Fim da transmissão") break else: data = np.array(mic.get_mic_data(chunk=chunk)) / 2**15 C, encoded_msg = fsk.demodulate( data, fs, Bd, carrier, threshold, bandwidth, N) byte = fsk.decode_ascii(encoded_msg) except KeyboardInterrupt: print('Transmissão encerrada') print(byte) self.ENCODED_SIGNAL = C if nparray is not None: C, encoded_msg = fsk.demodulate(nparray, fs, Bd, carrier, threshold, bandwidth, N) self.MESSAGE = fsk.decode_ascii(encoded_msg) self.ENCODED_SIGNAL = C print(self.MESSAGE, flush=True, end='')
predictor = classifier.AudioClassifier(args.classifier, args.categories, args.threshold, SMOOTHING) transform = featurizer.AudioTransform(args.featurizer, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") # set up inputs and outputs if args.wav_file: output_speaker = None if args.speaker: output_speaker = speaker.Speaker() reader = wav_reader.WavReader(args.sample_rate, CHANNELS) reader.open(args.wav_file, transform.input_size, output_speaker) else: reader = microphone.Microphone(True) reader.open(transform.input_size, args.sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") transform.open(reader) try: while True: feature_data = transform.read() if feature_data is None: break else: prediction, probability, label = predictor.predict(feature_data) if probability is not None: percent = int(100 * probability) print("<<< DETECTED ({}) {}% '{}' >>>".format(prediction, percent, label))
# else: # byte += bit # # for byte in bytearray: # s = fsk.generate_tones(byte, fc, Bd, carrier) # tone = s * (2**15 - 1) / np.max(np.abs(s)) # tone = tone.astype(np.int16) # sd.play(tone, fc) # status = sd.wait() # C, encoded_msg = fsk.demodulate(s, fc, Bd, carrier, 20, bandwidth, N) # print(fsk.decode_ascii(encoded_msg), end='', flush=True) # s = fsk.generate_tones(bmsg, fc, Bd, carrier) # white_noise = np.random.normal(0, 0.5, size=len(s))*0 # s = s + white_noise mic = microphone.Microphone() s = np.array(mic.get_mic_data()) C, encoded_msg = fsk.demodulate(s, fc, Bd, carrier, 500, bandwidth, N) # self.MESSAGE = fsk.decode_ascii(encoded_msg) # print(self.MESSAGE, flush=True, end='') # C, encoded_msg = fsk.demodulate(s, fc, Bd, carrier, 5, bandwidth, N) string = ''.join( [chr(int(encoded_msg[i:i + 8], 2)) for i in range(0, len(encoded_msg), 8)]) # print('Mensagem original: {}\n'.format(msg)) print('Mensagem decodificada: {}\n'.format(string)) print('Tamanho do sinal transmitido: {}Mb'.format(str(s.nbytes / 1e6))) plt.plot(C)
import sys sys.path += [ "d:/git/ell/ell/tools/utilities/pythonlibs/audio"] import numpy as np import time script_dir = os.path.dirname(os.path.abspath(__file__)) sys.path += [ script_dir ] sys.path += [ os.getcwd() ] import microphone from compiled_classifier import model from compiled_featurizer import mfcc THRESHOLD = 0.8 mic = microphone.Microphone(True) mic.open(256, 8000, 1) categories = [x.strip() for x in open('categories.txt','r').readlines()] transform = mfcc.MfccWrapper() class Classifier(model.ModelWrapper): def __init__(self): super(Classifier, self).__init__() self.last_vad = None def VadCallback(self, buffer): vad = buffer[0] if vad != self.last_vad: print("vad={}".format(vad)) self.last_vad = vad
if time.time() - 0.5 > prev_fps_update: prev_fps_update = time.time() print('FPS {:.0f} / {:.0f}'.format(fps, config.FPS)) # Number of audio samples to read every time frame samples_per_frame = int(config.MIC_RATE / config.FPS) # Array containing the rolling audio sample window y_roll = np.random.rand(config.N_ROLLING_HISTORY, samples_per_frame) / 1e16 visualization_effect = visualize_energy """Visualization effect to display on the LED strip""" # Global microphone mic = microphone.Microphone(microphone_update) def update_lamp_effect(mode): global lamp_effect global r_filt, g_filt, b_filt if (mode == 100): lamp_effect = 'BUBBLE' r_filt = dsp.ExpFilter(np.tile(0.01, config.N_PIXELS // 2), alpha_decay=0.5, alpha_rise=0.99) g_filt = dsp.ExpFilter(np.tile(0.01, config.N_PIXELS // 2), alpha_decay=0.5,