예제 #1
0
    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word',
                            'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()
예제 #2
0
 def chunk_audio_pieces(self, pieces, chunk_size):
     """Convert chunks of audio into a series of equally sized pieces"""
     left_over = np.array([])
     for piece in pieces:
         if left_over.size == 0:
             combined = piece
         else:
             combined = np.concatenate([left_over, piece], axis=-1)
         for chunk in chunk_audio(combined.T, chunk_size):
             yield chunk.T
         left_over = piece[-(len(piece) % chunk_size):]
예제 #3
0
    def vectors_from_fn(self, fn: str):
        """
        Run through a single background audio file, overlaying with wake words.
        Generates (mfccs, target) where mfccs is a series of mfcc values and
        target is a single integer classification of the target network output for that chunk
        """
        audio = load_audio(fn)
        audio_volume = self.calc_volume(audio)
        audio_volume *= 0.4 + 0.5 * random()
        audio = self.normalize_volume_to(audio, audio_volume)

        self.listener.clear()
        chunked_bg = chunk_audio(audio, self.args.chunk_size)
        chunked_ww = self.chunk_audio_pieces(
            self.generate_wakeword_pieces(audio_volume), self.args.chunk_size)

        for i, (chunk_bg, (chunk_ww,
                           targets)) in enumerate(zip(chunked_bg, chunked_ww)):
            chunk = self.merge(chunk_bg, chunk_ww, 0.6)
            self.vals_buffer = np.concatenate(
                (self.vals_buffer[len(targets):], targets))
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            mfccs = self.listener.update_vectors(chunk)
            percent_overlapping = self.max_run_length(
                self.vals_buffer, 1) / len(self.vals_buffer)

            if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8:
                target = 1
            elif percent_overlapping < 0.5:
                target = 0
            else:
                continue

            if random() > 1.0 - self.args.save_prob:
                name = splitext(basename(fn))[0]
                wav_file = join('debug', 'ww' if target == 1 else 'nww',
                                '{} - {}.wav'.format(name, i))
                save_audio(wav_file, self.audio_buffer)
            yield mfccs, target