def start_playing(self, filename): """ Play a wav file, and classify the audio. Note we use a background thread to read the wav file and we setup a UI animation function to draw the sliding spectrogram image, this way the UI update doesn't interfere with the smoothness of the audio playback """ if self.speaker is None: self.speaker = speaker.Speaker() self.stop() self.reading_input = False self.wav_file = wav_reader.WavReader(self.sample_rate, self.channels, self.auto_scale) self.wav_file.open(filename, self.featurizer.input_size, self.speaker) def update_func(frame_index): return self.on_ui_update() if self.animation: self.animation.event_source.stop() self.reading_input = True # Start animation timer for updating the UI (e.g. spectrogram image) self.animation = self.spectrogram_widget.begin_animation(update_func) # start background thread to read and classify the audio. self.featurizer.open(self.wav_file) self.read_input_thread = Thread(target=self.on_read_features, args=()) self.read_input_thread.daemon = True self.read_input_thread.start()
def start_playing(self, filename): """ Play a wav file, and classify the audio. Note we use a background thread to read the wav file and we setup a UI animation function to draw the sliding spectrogram image, this way the UI update doesn't interfere with the smoothness of the audio playback """ if self.speaker is None: self.speaker = speaker.Speaker() self.stop() self.reading_input = False self.wav_file = wav_reader.WavReader(self.sample_rate, self.channels) self.wav_file.open(filename, self.featurizer.input_size, self.speaker) def update_func(frame_index): self.process_output() if not self.reading_input: self.after(1, self.on_stopped) self.set_spectrogram_image() return (self.spectrogram_image,) if self.animation: self.animation.event_source.stop() self.reading_input = True # Start animation timer for updating the UI (e.g. spectrogram image) (30 fps is usually fine) self.animation = animation.FuncAnimation(self.features_figure, update_func, interval=33, blit=True) # start background thread to read and classify the audio. self.featurizer.open(self.wav_file) self.read_input_thread = Thread(target=self.on_read_features, args=()) self.read_input_thread.daemon = True self.read_input_thread.start()
def main(): pkt_tx_queue = [] def audio_frame_ready(data): pkt_tx_queue.append(data) lora_ctl = lora.LoRaController() spk = speaker.Speaker(DAC('P22')) adc = ADC() apin = adc.channel(pin='P13', attn=ADC.ATTN_11DB) uphone = microphone.Microphone(apin, audio_frame_ready) tlk_btn = talk_button.TalkButton(uphone) print('Started ...') flash(0x007f00) # green while True: Timer.sleep_us(1000) # Handle the RX packets # TODO: refactor to use callback mechanism. while True: data = lora_ctl.recv() if data: spk.enque(data) else: break # Handle the TX queue # TODO: refactor to use Python synchronized Queue. while pkt_tx_queue: data = pkt_tx_queue.pop(0) print('.') lora_ctl.send(data)
def play_sound(wavfile): import speaker import wav_reader reader = wav_reader.WavReader() reader.open(wavfile, 512, speaker.Speaker()) while True: buffer = reader.read() if buffer is None: break
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_files, threshold, sample_rate, output_speaker=False, auto_scale=False, reset=False): predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING) transform = featurizer.AudioTransform(featurizer_model, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") the_speaker = None if output_speaker: the_speaker = speaker.Speaker() results = [] if wav_files: if not os.path.isdir(wav_files): raise Exception("--wav_files {} dir not found".format(wav_files)) file_list = os.listdir(wav_files) file_list.sort() for filename in file_list: ext = os.path.splitext(filename)[1] if ext != ".wav": print("Skipping non-wav file: ", filename) else: reader = wav_reader.WavReader(sample_rate, CHANNELS, auto_scale) path = os.path.join(wav_files, filename) print("opening ", path) reader.open(path, transform.input_size, the_speaker) result = get_prediction(reader, transform, predictor, categories) results += [result] if reset: predictor.reset() else: reader = microphone.Microphone(True, True) reader.open(transform.input_size, sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") result = get_prediction(reader, transform, predictor, categories) results += [result] return results
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_file, threshold, sample_rate, output_speaker=False): predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING) transform = featurizer.AudioTransform(featurizer_model, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") # set up inputs and outputs if wav_file: the_speaker = None if output_speaker: the_speaker = speaker.Speaker() reader = wav_reader.WavReader(sample_rate, CHANNELS) reader.open(wav_file, transform.input_size, the_speaker) else: reader = microphone.Microphone(True) reader.open(transform.input_size, sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") transform.open(reader) results = None try: while True: feature_data = transform.read() if feature_data is None: break else: prediction, probability, label = predictor.predict(feature_data) if probability is not None: if not results or results[1] < probability: results = (prediction, probability, label) percent = int(100 * probability) print("<<< DETECTED ({}) {}% '{}' >>>".format(prediction, percent, label)) except KeyboardInterrupt: pass transform.close() average_time = predictor.avg_time() + transform.avg_time() print("Average processing time: {}".format(average_time)) if results is None: raise Exception("test_keyword_spotter failed to find any predictions!") return tuple(list(results) + [average_time])
def audio_loopback(): def handle_audio(data): spk.enque(data) int_mode = False spk = speaker.Speaker(DAC('P22'), int_mode=int_mode, debug=False) adc = ADC() apin = adc.channel(pin='P13', attn=ADC.ATTN_11DB) uphone = microphone.Microphone(apin, handle_audio, int_mode=int_mode) tlk_btn = talk_button.TalkButton(uphone) print('Audio playpack ...') flash(0x000010) # Dark blue while True: if int_mode: Timer.sleep_us(1000000) else: uphone.loop() spk.loop()
def __init__(self, name): self.name = name self.trader = trader.Trader() self.speaker = speaker.Speaker() self.trader.binance_public = keys.binance_public self.trader.binance_private = keys.binance_private self.speaker.tele_token = keys.telegram_token self.speaker.tele_chatid = keys.telegram_chatid self.state = "shut down" #Other states : "sleeping" (only listening) and "awake" (listening, trading and sending updates) #Set-up loggers self.error_log = setup_logger("Error logger", PATH + '/error_log.txt', logging.ERROR) self.trade_log = setup_logger("Trade logger", PATH + '/trade_log.txt', logging.INFO) self.listening_thread = None self.sending_thread = None self.trading_thread = None self.listening = False self.trading = False self.sending = False self.verbose = True self.show_too_low = False self.commands = { 'Go sleep': self.go_sleep, 'Wake up!': self.wake_up, 'Shutdown': self.shut_down, 'Talk to me': self.set_verbose, 'Be quiet': self.set_verbose, 'Last trade': self.last_trade, 'Show too low': self.set_show_too_low, 'You alright?': self.get_state, 'Get <attribute>': None }
def analys(self): result = '' priv = self.is_hello() com = self.is_commands() com1 = self.is_first_command() com2 = self.is_second_command() com3 = self.is_third_command() com4 = self.is_fourth_command() com5 = self.is_fifth_command() com6 = self.is_sixth_command() add_admin = self.add_admin() remove_admin = self.remove_admin() adm = self.all_admins() if priv: result = priv elif com: result = com elif com1: result = com1 elif com2: result = com2 elif com3: result = com3 elif com4: result = com4 elif com5: result = com5 elif com6: result = com6 elif add_admin: result = add_admin elif remove_admin: result = remove_admin elif adm: result = adm else: result = speaker.Speaker(self.text).get_result() if result == '': result = "чтобы посмотреть возможные команды, напиши слово \"Команды\"" return result
def __init__(self): # Microphone stream config. self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 16000 self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where # only silence is recorded. When this time passes the # recording finishes and the file is decoded self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise # is detected, how much of previously recorded audio is # prepended. This helps to prevent chopping the beginning # of the phrase. self.THRESHOLD = 3500 self.num_phrases = -1 self.MODULES = ["light", "time", "alarm", "remind", "song", "article"] self.STATES = [ "tell", "on", "off", "set", "unset", "play", "stop", "use" ] self.light = Light() self.speaker = speaker.Speaker() self.song = Song(self.speaker) # These will need to be modified according to where the pocketsphinx folder is MODELDIR = "en-adapt" # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us-adapt')) config.set_string('-lm', os.path.join(MODELDIR, 'alfred/alfred.lm')) config.set_string('-dict', os.path.join(MODELDIR, 'alfred/alfred.dic')) # Creaders decoder object for streaming data. self.decoder = Decoder(config)
def is_closed(self): return self.wav_reader1 is None if __name__ == "__main__": parser = argparse.ArgumentParser("Test the AudioNoiseMixer class") parser.add_argument("--wav_file", "-w", help=".wav file to process") parser.add_argument("--noise_dir", "-n", help="directory of .wav files containing noise") parser.add_argument("--mix_ratio", "-r", type=float, default=0.1, help="how much noise to add") args = parser.parse_args() noise_files = [] noise_dir = args.noise_dir for f in os.listdir(noise_dir): if os.path.splitext(f)[1] == ".wav": noise_files += [os.path.join(noise_dir, f)] speaker = speaker.Speaker() mixer = AudioNoiseMixer(noise_files, mix_ratio=args.mix_ratio, mix_percent=1) reader = wav_reader.WavReader(16000, 1) reader.open(args.wav_file, 512) mixer.open(reader, speaker) while True: data = mixer.read() if data is None: break print("finished")
def ask(s): return str(raw_input(str(s) + "\n> ")) #vision_source = source.FileSource() vision_source = source.IPSource() print "Starting scrabble vision..." sv = vision.ScrabbleVision(source=vision_source) sv.start() while not sv.started: pass print "Scrabble vision started. Ready." print "Starting speaker..." voice = speaker.Speaker() voice.start() PICKLE_FILENAME = "game.state" if len(sys.argv) == 2: filename = sys.argv[1] (scoreboard, game_board) = pickle.load(open(filename, "rb")) print "Game recovered from file" voice.say("Resuming game!") else: #Find out our players player_count = int(ask("How many players?")) player_list = [] for i in range(1, player_count + 1): x = ask("What is Player %d's name?" % i)
parser.add_argument("--threshold", "-t", help="Classifier threshold (default 0.6)", default=THRESHOLD, type=float) parser.add_argument("--speaker", help="Output audio to the speaker.", action='store_true') args = parser.parse_args() predictor = classifier.AudioClassifier(args.classifier, args.categories, args.threshold, SMOOTHING) transform = featurizer.AudioTransform(args.featurizer, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") # set up inputs and outputs if args.wav_file: output_speaker = None if args.speaker: output_speaker = speaker.Speaker() reader = wav_reader.WavReader(args.sample_rate, CHANNELS) reader.open(args.wav_file, transform.input_size, output_speaker) else: reader = microphone.Microphone(True) reader.open(transform.input_size, args.sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") transform.open(reader) try: while True: feature_data = transform.read() if feature_data is None: break else:
def __init__(self, mastername): self.mastername = mastername self.speaker = speaker.Speaker()
help="Classifier threshold (default 0.6)", default=THRESHOLD, type=float) args = parser.parse_args() predictor = classifier.AudioClassifier(args.classifier, args.categories, [0], args.threshold, SMOOTHING) transform = featurizer.AudioTransform(args.featurizer, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") # setup inputs and outputs if args.wav_file: speaker = speaker.Speaker() # output wav file to speakers at the same time reader = wav_reader.WavReader(args.sample_rate, CHANNELS) reader.open(args.wav_file, transform.input_size, speaker) else: reader = microphone.Microphone(True) reader.open(transform.input_size, args.sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") transform.open(reader) try: while True: feature_data = transform.read() if feature_data is None: break else:
feat_dict = read_img_features(features) train_env = R2RBatch(feat_dict, batch_size=64, splits=['train'], tokenizer=tok) log_dir = "snap/speaker/state_dict/best_val_seen_bleu" val_env_names = ['val_unseen', 'val_seen'] featurized_scans = set([key.split("_")[0] for key in list(feat_dict.keys())]) val_envs = OrderedDict(((split, (R2RBatch(feat_dict, batch_size=args.batchSize, splits=[split], tokenizer=tok), Evaluation([split], featurized_scans, tok))) for split in val_env_names)) listner = Seq2SeqAgent(train_env, "", tok, 35) speaker = speaker.Speaker(train_env, listner, tok) speaker.load(log_dir) speaker.env = train_env results = {} for env_name, (env, evaluator) in val_envs.items(): print("............ Evaluating %s ............." % env_name) speaker.env = env path2inst, loss, word_accu, sent_accu = speaker.valid() r = defaultdict(dict) for path_id in path2inst.keys(): # internal_bleu = evaluator.compute_internal_bleu_score(path_id) # if internal_bleu == 1.0: # import pdb; # pdb.set_trace()