Esempio n. 1
0
def main():
    args = create_parser(usage).parse_args()
    args.tags_file = abspath(args.tags_file) if args.tags_file else None
    args.folder = abspath(args.folder)
    args.output_folder = abspath(args.output_folder)
    noise_min, noise_max = args.noise_ratio_low, args.noise_ratio_high

    data = TrainData.from_both(args.tags_file, args.folder, args.folder)
    noise_data = NoiseData(args.noise_folder)
    print('Data:', data)

    def translate_filename(source: str, n=0) -> str:
        assert source.startswith(args.folder)
        relative_file = source[len(args.folder):].strip(os.path.sep)
        if n > 0:
            base, ext = splitext(relative_file)
            relative_file = base + '.' + str(n) + ext
        return join(args.output_folder, relative_file)

    all_filenames = sum(data.train_files + data.test_files, [])
    for i, filename in enumerate(all_filenames):
        print('{0:.2%}  \r'.format(i / (len(all_filenames) - 1)), end='', flush=True)

        audio = load_audio(filename)
        for n in range(args.inflation_factor):
            altered = noise_data.noised_audio(audio, noise_min + (noise_max - noise_min) * random())
            output_filename = translate_filename(filename, n)

            makedirs(dirname(output_filename), exist_ok=True)
            save_audio(output_filename, altered)

    print('Done!')

    if args.tags_file and args.tags_file.startswith(args.folder):
        shutil.copy2(args.tags_file, translate_filename(args.tags_file))
    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word',
                            'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()
Esempio n. 3
0
 def on_activation_safe():
     global chunk_num
     nm = join(
         args.save_dir,
         args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
     save_audio(nm, audio_buffer)
     print()
     print('Saved to ' + nm + '.')
     chunk_num += 1
Esempio n. 4
0
    def on_activation(self):
        activate_notify()

        if self.args.save_dir:
            nm = join(
                self.args.save_dir, self.args.save_prefix + self.session_id +
                '.' + str(self.chunk_num) + '.wav')
            save_audio(nm, self.audio_buffer)
            print()
            print('Saved to ' + nm + '.')
            self.chunk_num += 1
Esempio n. 5
0
 def on_activation():
     Popen(['aplay', '-q', 'data/activate.wav'])
     if args.save_dir:
         global chunk_num
         nm = join(
             args.save_dir,
             args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
         save_audio(nm, audio_buffer)
         print()
         print('Saved to ' + nm + '.')
         chunk_num += 1
Esempio n. 6
0
    def vectors_from_fn(self, fn: str):
        """
        Run through a single background audio file, overlaying with wake words.
        Generates (mfccs, target) where mfccs is a series of mfcc values and
        target is a single integer classification of the target network output for that chunk
        """
        audio = load_audio(fn)
        audio_volume = self.calc_volume(audio)
        audio_volume *= 0.4 + 0.5 * random()
        audio = self.normalize_volume_to(audio, audio_volume)

        self.listener.clear()
        chunked_bg = chunk_audio(audio, self.args.chunk_size)
        chunked_ww = self.chunk_audio_pieces(
            self.generate_wakeword_pieces(audio_volume), self.args.chunk_size)

        for i, (chunk_bg, (chunk_ww,
                           targets)) in enumerate(zip(chunked_bg, chunked_ww)):
            chunk = self.merge(chunk_bg, chunk_ww, 0.6)
            self.vals_buffer = np.concatenate(
                (self.vals_buffer[len(targets):], targets))
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            mfccs = self.listener.update_vectors(chunk)
            percent_overlapping = self.max_run_length(
                self.vals_buffer, 1) / len(self.vals_buffer)

            if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8:
                target = 1
            elif percent_overlapping < 0.5:
                target = 0
            else:
                continue

            if random() > 1.0 - self.args.save_prob:
                name = splitext(basename(fn))[0]
                wav_file = join('debug', 'ww' if target == 1 else 'nww',
                                '{} - {}.wav'.format(name, i))
                save_audio(wav_file, self.audio_buffer)
            yield mfccs, target
Esempio n. 7
0
 def generate_samples(self, folder, name, value, duration):
     for i in range(self.count):
         save_audio(join(folder, name.format(i)),
                    np.array([value] * int(duration * pr.sample_rate)))