def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > self.args.threshold: self.samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and self.samples_since_train >= self.args.delay_samples and \ self.args.epochs > 0: self.samples_since_train = 0 self.retrain()
def chunk_audio_pieces(self, pieces, chunk_size): """Convert chunks of audio into a series of equally sized pieces""" left_over = np.array([]) for piece in pieces: if left_over.size == 0: combined = piece else: combined = np.concatenate([left_over, piece], axis=-1) for chunk in chunk_audio(combined.T, chunk_size): yield chunk.T left_over = piece[-(len(piece) % chunk_size):]
def vectors_from_fn(self, fn: str): """ Run through a single background audio file, overlaying with wake words. Generates (mfccs, target) where mfccs is a series of mfcc values and target is a single integer classification of the target network output for that chunk """ audio = load_audio(fn) audio_volume = self.calc_volume(audio) audio_volume *= 0.4 + 0.5 * random() audio = self.normalize_volume_to(audio, audio_volume) self.listener.clear() chunked_bg = chunk_audio(audio, self.args.chunk_size) chunked_ww = self.chunk_audio_pieces( self.generate_wakeword_pieces(audio_volume), self.args.chunk_size) for i, (chunk_bg, (chunk_ww, targets)) in enumerate(zip(chunked_bg, chunked_ww)): chunk = self.merge(chunk_bg, chunk_ww, 0.6) self.vals_buffer = np.concatenate( (self.vals_buffer[len(targets):], targets)) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) mfccs = self.listener.update_vectors(chunk) percent_overlapping = self.max_run_length( self.vals_buffer, 1) / len(self.vals_buffer) if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8: target = 1 elif percent_overlapping < 0.5: target = 0 else: continue if random() > 1.0 - self.args.save_prob: name = splitext(basename(fn))[0] wav_file = join('debug', 'ww' if target == 1 else 'nww', '{} - {}.wav'.format(name, i)) save_audio(wav_file, self.audio_buffer) yield mfccs, target