def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > self.args.threshold: self.samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and self.samples_since_train >= self.args.delay_samples and \ self.args.epochs > 0: self.samples_since_train = 0 self.retrain()
def main(): args = create_parser(usage).parse_args() args.tags_file = abspath(args.tags_file) if args.tags_file else None args.folder = abspath(args.folder) args.output_folder = abspath(args.output_folder) noise_min, noise_max = args.noise_ratio_low, args.noise_ratio_high data = TrainData.from_both(args.tags_file, args.folder, args.folder) noise_data = NoiseData(args.noise_folder) print('Data:', data) def translate_filename(source: str, n=0) -> str: assert source.startswith(args.folder) relative_file = source[len(args.folder):].strip(os.path.sep) if n > 0: base, ext = splitext(relative_file) relative_file = base + '.' + str(n) + ext return join(args.output_folder, relative_file) all_filenames = sum(data.train_files + data.test_files, []) for i, filename in enumerate(all_filenames): print('{0:.2%} \r'.format(i / (len(all_filenames) - 1)), end='', flush=True) audio = load_audio(filename) for n in range(args.inflation_factor): altered = noise_data.noised_audio(audio, noise_min + (noise_max - noise_min) * random()) output_filename = translate_filename(filename, n) makedirs(dirname(output_filename), exist_ok=True) save_audio(output_filename, altered) print('Done!') if args.tags_file and args.tags_file.startswith(args.folder): shutil.copy2(args.tags_file, translate_filename(args.tags_file))
def load_vector(name: str, vectorizer: Callable = None) -> np.ndarray: """Loads and caches a vector input from a wav or npy file""" import os #print('pr.max_samples:', pr.max_samples) #print('pr.n_features:', pr.n_features) #print('pr.n_mfcc:', pr.n_mfcc) #print('use_delta:', pr.use_delta) #print(vectorizer) vectorizer = vectorizer or (vectorize_delta if pr.use_delta else vectorize) #print(vectorizer) save_name = name if name.endswith('.npy') else os.path.join( '.cache', hashlib.md5( str(sorted(pr.__dict__.values())).encode() ).hexdigest(), vectorizer.__name__ + '.' + name + '.npy') #print(save_name) if os.path.isfile(save_name): return np.load(save_name) print('Loading ' + name + '...') os.makedirs(os.path.dirname(save_name), exist_ok=True) vec = vectorizer(load_audio(name)) np.save(save_name, vec) return vec
def play_audio(audio_file): nonlocal stream if stream: stop_event.clear() stop_event.wait() stream.stop_stream() stream.close() stream = None audio = load_audio(audio_file)[-pr.buffer_samples:] audio /= 2 * min(audio.mean() + 4 * audio.std(), abs(audio).max()) stream = p.open(format=paFloat32, channels=1, rate=pr.sample_rate, output=True) stream.start_stream() def write_audio(): data = audio.astype('float32').tostring() chunk_size = 1024 for pos in range(chunk_size, len(data) + chunk_size, chunk_size): if not stop_event.is_set(): stop_event.set() return stream.write(data[pos - chunk_size:pos]) while stop_event.is_set(): sleep(chunk_size / pr.sample_rate) stop_event.set() Thread(target=write_audio, daemon=True).start()
def __init__(self, noise_folder: str): self.noise_data = [ load_audio(file) for file in glob(join(noise_folder, '*.wav')) ] self.noise_data_id = 0 self.noise_pos = 0 self.repeat_count = 0
def run(self): total = Metric(chunk_size=self.args.chunk_size) for i in glob(join(self.args.folder, '*.wav')): audio = load_audio(i) if audio.size == 0: continue predictions = self.evaluate(audio) detector = TriggerDetector(self.args.chunk_size, trigger_level=0, sensitivity=self.args.threshold) metric = Metric( chunk_size=self.args.chunk_size, seconds=len(audio) / pr.sample_rate, activated_chunks=(predictions > detector.sensitivity).sum(), activations=sum(detector.update(i) for i in predictions), activation_sum=predictions.sum()) total.add(metric) print() print(metric.info_string(basename(i))) del audio print() print() print(total.info_string('Total'))
def generate_wakeword_pieces(self, volume): """Generates chunks of audio that represent the wakeword stream""" while True: target = 1 if random() > 0.5 else 0 it = self.pos_files_it if target else self.neg_files_it sample_file = next(it) yield self.layer_with( self.normalize_volume_to(load_audio(sample_file), volume), target) yield self.layer_with( np.zeros(int(pr.sample_rate * (0.5 + 2.0 * random()))), 0)
def load_vector(name: str, vectorizer: Callable = None) -> np.ndarray: """Loads and caches a vector input from a wav or npy file""" vectorizer = vectorizer or (vectorize_delta if pr.use_delta else vectorize) save_name = name if name.endswith('.npy') else get_cache_file(name) if os.path.isfile(save_name): return np.load(save_name) os.makedirs(os.path.dirname(save_name), exist_ok=True) vec = vectorizer(load_audio(name)) np.save(save_name, vec) return vec
def __load_files(kw_files: list, nkw_files: list, vectorizer: Callable = None, shuffle=True) -> tuple: from precise.params import pr input_parts = [] output_parts = [] vectorizer = vectorizer or (vectorize_delta if pr.use_delta else vectorize) cache = Pyache('.cache', lambda x: vectorizer(load_audio(x)), pr.vectorization_md5_hash()) def add(filenames, output): def on_loop(): on_loop.i += 1 print('\r{0:.2%} '.format(on_loop.i / len(filenames)), end='', flush=True) on_loop.i = 0 new_inputs = cache.load(filenames, on_loop=on_loop) new_outputs = np.array([[output] for _ in range(len(new_inputs))]) if new_inputs.size == 0: new_inputs = np.empty((0, pr.n_features, pr.feature_size)) if new_outputs.size == 0: new_outputs = np.empty((0, 1)) input_parts.append(new_inputs) output_parts.append(new_outputs) print('\r \r', end='', flush=True) print('Loading wake-word...') add(kw_files, 1.0) print('Loading not-wake-word...') add(nkw_files, 0.0) from precise.params import pr inputs = np.concatenate(input_parts) if input_parts else np.empty( (0, pr.n_features, pr.feature_size)) outputs = np.concatenate(output_parts) if output_parts else np.empty( (0, 1)) shuffle_ids = np.arange(len(inputs)) if shuffle: np.random.shuffle(shuffle_ids) return inputs[shuffle_ids], outputs[shuffle_ids]
def load_vector(name: str, vectorizer: Callable = vectorize) -> np.ndarray: """Loads and caches a vector input from a wav or npy file""" import os save_name = name if name.endswith('.npy') else os.path.join( '.cache', str(abs(hash(pr))), vectorizer.__name__ + '.' + name + '.npy') if os.path.isfile(save_name): return np.load(save_name) print('Loading ' + name + '...') os.makedirs(os.path.dirname(save_name), exist_ok=True) vec = vectorizer(load_audio(name)) np.save(save_name, vec) return vec
def vectors_from_fn(self, fn: str): """ Run through a single background audio file, overlaying with wake words. Generates (mfccs, target) where mfccs is a series of mfcc values and target is a single integer classification of the target network output for that chunk """ audio = load_audio(fn) audio_volume = self.calc_volume(audio) audio_volume *= 0.4 + 0.5 * random() audio = self.normalize_volume_to(audio, audio_volume) self.listener.clear() chunked_bg = chunk_audio(audio, self.args.chunk_size) chunked_ww = self.chunk_audio_pieces( self.generate_wakeword_pieces(audio_volume), self.args.chunk_size) for i, (chunk_bg, (chunk_ww, targets)) in enumerate(zip(chunked_bg, chunked_ww)): chunk = self.merge(chunk_bg, chunk_ww, 0.6) self.vals_buffer = np.concatenate( (self.vals_buffer[len(targets):], targets)) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) mfccs = self.listener.update_vectors(chunk) percent_overlapping = self.max_run_length( self.vals_buffer, 1) / len(self.vals_buffer) if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8: target = 1 elif percent_overlapping < 0.5: target = 0 else: continue if random() > 1.0 - self.args.save_prob: name = splitext(basename(fn))[0] wav_file = join('debug', 'ww' if target == 1 else 'nww', '{} - {}.wav'.format(name, i)) save_audio(wav_file, self.audio_buffer) yield mfccs, target
def play_wav(filename, p: PyAudio): audio = load_audio(filename)