コード例 #1
0
class TrainIncrementalScript(TrainScript):
    usage = Usage('''
        Train a model to inhibit activation by
        marking false activations and retraining

        :-e --epochs int 1
            Number of epochs to train before continuing evaluation

        :-ds --delay-samples int 10
            Number of false activations to save before re-training

        :-c --chunk-size int 2048
            Number of samples between testing the neural network

        :-r --random-data-folder str data/random
            Folder with properly encoded wav files of
            random audio that should not cause an activation

        :-th --threshold float 0.5
            Network output to be considered activated

        ...
    ''') | TrainScript.usage

    def __init__(self, args):
        super().__init__(args)

        for i in (
                join(self.args.folder, 'not-wake-word', 'generated'),
                join(self.args.folder, 'test', 'not-wake-word', 'generated')
        ):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(
            skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics,
            loss_bias=1.0 - self.args.sensitivity
        )
        model = create_model(self.args.model, params)
        self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner)
        self.listener.runner = KerasRunner(self.args.model)
        self.listener.runner.model = model
        self.samples_since_train = 0

    @staticmethod
    def load_data(args: Any):
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        return data.load(True, not args.no_validation)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.sampled_data)
        test_data = TrainData.merge(test_data, self.test)
        train_inputs, train_outputs = train_data
        print()
        try:
            self.listener.runner.model.fit(
                train_inputs, train_outputs, self.args.batch_size, self.epoch + self.args.epochs,
                validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch
            )
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word',
                            'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()

    def run(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
コード例 #2
0
class TrainGeneratedScript(BaseScript):
    usage = Usage('''
        Train a model on infinitely generated batches

        :model str
            Keras .net model file to load from and write to

        :-e --epochs int 100
            Number of epochs to train on

        :-b --batch-size int 200
            Number of samples in each batch

        :-t --steps-per-epoch int 100
            Number of steps that are considered an epoch

        :-c --chunk-size int 2048
            Number of audio samples between generating a training sample

        :-r --random-data-folder str data/random
            Folder with properly encoded wav files of
            random audio that should not cause an activation

        :-s --sensitivity float 0.2
            Weighted loss bias. Higher values decrease increase positives

        :-sb --save-best
            Only save the model each epoch if its stats improve

        :-nv --no-validation
            Disable accuracy and validation calculation
            to improve speed during training

        :-mm --metric-monitor str loss
            Metric used to determine when to save

        :-em --extra-metrics
            Add extra metrics during training

        :-p --save-prob float 0.0
            Probability of saving audio into debug/ww and debug/nww folders

        ...
    ''') | TrainData.usage
    """A trainer the runs on generated data by overlaying wakewords on background audio"""
    def __init__(self, args):
        super().__init__(args)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
        self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity)
        self.model = create_model(args.model, params)
        self.listener = Listener('',
                                 args.chunk_size,
                                 runner_cls=lambda x: None)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(_a, _b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]

        self.data = TrainData.from_both(args.tags_file, args.tags_folder,
                                        args.folder)
        pos_files, neg_files = self.data.train_files
        self.neg_files_it = iter(cycle(neg_files))
        self.pos_files_it = iter(cycle(pos_files))

    def layer_with(self, sample: np.ndarray, value: int) -> np.ndarray:
        """Create an identical 2d array where the second row is filled with value"""
        b = np.full((2, len(sample)), value, dtype=float)
        b[0] = sample
        return b

    def generate_wakeword_pieces(self, volume):
        """Generates chunks of audio that represent the wakeword stream"""
        while True:
            target = 1 if random() > 0.5 else 0
            it = self.pos_files_it if target else self.neg_files_it
            sample_file = next(it)
            yield self.layer_with(
                self.normalize_volume_to(load_audio(sample_file), volume),
                target)
            yield self.layer_with(
                np.zeros(int(pr.sample_rate * (0.5 + 2.0 * random()))), 0)

    def chunk_audio_pieces(self, pieces, chunk_size):
        """Convert chunks of audio into a series of equally sized pieces"""
        left_over = np.array([])
        for piece in pieces:
            if left_over.size == 0:
                combined = piece
            else:
                combined = np.concatenate([left_over, piece], axis=-1)
            for chunk in chunk_audio(combined.T, chunk_size):
                yield chunk.T
            left_over = piece[-(len(piece) % chunk_size):]

    def calc_volume(self, sample: np.ndarray):
        """Find the RMS of the audio"""
        return sqrt(np.mean(np.square(sample)))

    def normalize_volume_to(self, sample, volume):
        """Normalize the volume to a certain RMS"""
        return volume * sample / self.calc_volume(sample)

    def merge(self, a, b, ratio):
        """Perform a weighted sum of a and b. ratio=1.0 means 100% of b and 0% of a"""
        return (1.0 - ratio) * a + ratio * b

    @staticmethod
    def max_run_length(x: np.ndarray, val: int):
        """Finds the maximum continuous length of the given value in the sequence"""
        if x.size == 0:
            return 0
        else:
            y = np.array(x[1:] != x[:-1])
            i = np.append(np.where(y), len(x) - 1)
            run_lengths = np.diff(np.append(-1, i))
            run_length_values = x[i]
            return max([
                rl for rl, v in zip(run_lengths, run_length_values) if v == val
            ],
                       default=0)

    def vectors_from_fn(self, fn: str):
        """
        Run through a single background audio file, overlaying with wake words.
        Generates (mfccs, target) where mfccs is a series of mfcc values and
        target is a single integer classification of the target network output for that chunk
        """
        audio = load_audio(fn)
        audio_volume = self.calc_volume(audio)
        audio_volume *= 0.4 + 0.5 * random()
        audio = self.normalize_volume_to(audio, audio_volume)

        self.listener.clear()
        chunked_bg = chunk_audio(audio, self.args.chunk_size)
        chunked_ww = self.chunk_audio_pieces(
            self.generate_wakeword_pieces(audio_volume), self.args.chunk_size)

        for i, (chunk_bg, (chunk_ww,
                           targets)) in enumerate(zip(chunked_bg, chunked_ww)):
            chunk = self.merge(chunk_bg, chunk_ww, 0.6)
            self.vals_buffer = np.concatenate(
                (self.vals_buffer[len(targets):], targets))
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            mfccs = self.listener.update_vectors(chunk)
            percent_overlapping = self.max_run_length(
                self.vals_buffer, 1) / len(self.vals_buffer)

            if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8:
                target = 1
            elif percent_overlapping < 0.5:
                target = 0
            else:
                continue

            if random() > 1.0 - self.args.save_prob:
                name = splitext(basename(fn))[0]
                wav_file = join('debug', 'ww' if target == 1 else 'nww',
                                '{} - {}.wav'.format(name, i))
                save_audio(wav_file, self.audio_buffer)
            yield mfccs, target

    @staticmethod
    def samples_to_batches(samples: Iterable, batch_size: int):
        """Chunk a series of network inputs and outputs into larger batches"""
        it = iter(samples)
        while True:
            with suppress(StopIteration):
                batch_in, batch_out = [], []
                for i in range(batch_size):
                    sample_in, sample_out = next(it)
                    batch_in.append(sample_in)
                    batch_out.append(sample_out)
            if not batch_in:
                raise StopIteration
            yield np.array(batch_in), np.array(batch_out)

    def generate_samples(self):
        """Generate training samples (network inputs and outputs)"""
        filenames = glob_all(self.args.random_data_folder, '*.wav')
        shuffle(filenames)
        while True:
            for fn in filenames:
                for x, y in self.vectors_from_fn(fn):
                    yield x, y

    def run(self):
        """Train the model on randomly generated batches"""
        _, test_data = self.data.load(train=False, test=True)
        try:
            self.model.fit_generator(self.samples_to_batches(
                self.generate_samples(), self.args.batch_size),
                                     steps_per_epoch=self.args.steps_per_epoch,
                                     epochs=self.epoch + self.args.epochs,
                                     validation_data=test_data,
                                     callbacks=self.callbacks,
                                     initial_epoch=self.epoch)
        finally:
            self.model.save(self.args.model)
            save_params(self.args.model)
コード例 #3
0
class IncrementalTrainer(Trainer):
    def __init__(self):
        super().__init__(create_parser(usage))

        for i in (join(self.args.folder, 'not-wake-word', 'generated'),
                  join(self.args.folder, 'test', 'not-wake-word',
                       'generated')):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=self.args.no_validation,
                             extra_metrics=self.args.extra_metrics,
                             loss_bias=1.0 - self.args.sensitivity)
        model = create_model(self.args.model, params)
        self.listener = Listener(self.args.model,
                                 self.args.chunk_size,
                                 runner_cls=KerasRunner)
        self.listener.runner = KerasRunner(self.args.model)
        self.listener.runner.model = model
        self.samples_since_train = 0

    @staticmethod
    def load_data(args: Any):
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        return data.load(True, not args.no_validation)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.sampled_data)
        test_data = TrainData.merge(test_data, self.test)
        train_inputs, train_outputs = train_data
        print()
        try:
            self.listener.runner.model.fit(train_inputs,
                                           train_outputs,
                                           self.args.batch_size,
                                           self.epoch + self.args.epochs,
                                           validation_data=test_data,
                                           callbacks=self.callbacks,
                                           initial_epoch=self.epoch)
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '',
                            'not-wake-word', 'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()

    def run(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
コード例 #4
0
class IncrementalTrainer:
    def __init__(self, args):
        self.args = args
        self.trained_fns = load_trained_fns(args.model)
        pr = inject_params(args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        from keras.callbacks import ModelCheckpoint
        self.checkpoint = ModelCheckpoint(args.model,
                                          monitor=args.metric_monitor,
                                          save_best_only=args.save_best)
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        self.tags_data = data.load(True, not args.no_validation)

        if not isfile(args.model):
            create_model(args.model, args.no_validation,
                         args.extra_metrics).save(args.model)
        self.listener = Listener(args.model,
                                 args.chunk_size,
                                 runner_cls=KerasRunner)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.tags_data[0])
        test_data = TrainData.merge(test_data, self.tags_data[1])
        print()
        try:
            self.listener.runner.model.fit(*train_data,
                                           self.args.batch_size,
                                           self.args.epochs,
                                           validation_data=test_data,
                                           callbacks=[self.checkpoint])
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        samples_since_train = 0
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > 0.5:
                samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '',
                            'not-wake-word', 'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and samples_since_train >= self.args.delay_samples and self.args.epochs > 0:
                samples_since_train = 0
                self.retrain()

    def train_incremental(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)