def __init__(self, model_name: str, chunk_size: int = -1, runner_cls: type = None):
     self.window_audio = np.array([])
     self.pr = inject_params(model_name)
     self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc))
     self.chunk_size = chunk_size
     runner_cls = runner_cls or self.find_runner(model_name)
     self.runner = runner_cls(model_name)
     self.threshold_decoder = ThresholdDecoder(self.pr.threshold_config, pr.threshold_center)
Ejemplo n.º 2
0
    def run(self):
        args = self.args
        if args.models:
            data = TrainData.from_both(args.tags_file, args.tags_folder,
                                       args.folder)
            print('Data:', data)
            filenames = sum(
                data.train_files if args.use_train else data.test_files, [])
            loader = CachedDataLoader(
                partial(data.load,
                        args.use_train,
                        not args.use_train,
                        shuffle=False))
            model_data = calc_stats(args.models, loader, args.use_train,
                                    filenames)
        else:
            model_data = {
                name: Stats.from_np_dict(data)
                for name, data in np.load(args.input_file)
                ['data'].item().items()
            }
            for name, stats in model_data.items():
                print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(),
                                                      stats.summary_str()))

        if args.output_file:
            np.savez(args.output_file,
                     data={
                         name: stats.to_np_dict()
                         for name, stats in model_data.items()
                     })
        else:
            plt = load_plt()
            decoder = ThresholdDecoder(pr.threshold_config,
                                       pr.threshold_center)
            thresholds = [
                decoder.encode(i)
                for i in np.linspace(0.0, 1.0, args.resolution)[1:-1]
            ]
            for model_name, stats in model_data.items():
                x = [stats.false_positives(i) for i in thresholds]
                y = [stats.false_negatives(i) for i in thresholds]
                plt.plot(x, y, marker='x', linestyle='-', label=model_name)
                if args.labels:
                    for x, y, threshold in zip(x, y, thresholds):
                        plt.annotate('{:.4f}'.format(threshold), (x, y))

            plt.legend()
            plt.xlabel('False Positives')
            plt.ylabel('False Negatives')
            plt.show()
Ejemplo n.º 3
0
class Listener:
    """Listener that preprocesses audio into MFCC vectors and executes neural networks"""
    def __init__(self,
                 model_name: str,
                 chunk_size: int = -1,
                 runner_cls: type = None):
        self.window_audio = np.array([])
        self.pr = inject_params(model_name)
        self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc))
        self.chunk_size = chunk_size
        runner_cls = runner_cls or self.find_runner(model_name)
        self.runner = runner_cls(model_name)
        self.threshold_decoder = ThresholdDecoder(self.pr.threshold_config,
                                                  pr.threshold_center)

    @staticmethod
    def find_runner(model_name: str) -> Type[Runner]:
        runners = {'.net': KerasRunner, '.pb': TensorFlowRunner}
        ext = splitext(model_name)[-1]
        if ext not in runners:
            raise ValueError('File extension of ' + model_name + ' must be: ' +
                             str(list(runners)))
        return runners[ext]

    def clear(self):
        self.window_audio = np.array([])
        self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc))

    def update_vectors(
            self, stream: Union[BinaryIO, np.ndarray, bytes]) -> np.ndarray:
        if isinstance(stream, np.ndarray):
            buffer_audio = stream
        else:
            if isinstance(stream, (bytes, bytearray)):
                chunk = stream
            else:
                chunk = stream.read(self.chunk_size)
            if len(chunk) == 0:
                raise EOFError
            buffer_audio = buffer_to_audio(chunk)

        self.window_audio = np.concatenate((self.window_audio, buffer_audio))

        if len(self.window_audio) >= self.pr.window_samples:
            new_features = vectorize_raw(self.window_audio)
            self.window_audio = self.window_audio[len(new_features) *
                                                  self.pr.hop_samples:]
            if len(new_features) > len(self.mfccs):
                new_features = new_features[-len(self.mfccs):]
            self.mfccs = np.concatenate(
                (self.mfccs[len(new_features):], new_features))

        return self.mfccs

    def update(self, stream: Union[BinaryIO, np.ndarray, bytes]) -> float:
        mfccs = self.update_vectors(stream)
        if self.pr.use_delta:
            mfccs = add_deltas(mfccs)
        raw_output = self.runner.run(mfccs)
        return self.threshold_decoder.decode(raw_output)
Ejemplo n.º 4
0
def main():
    parser = create_parser(usage)
    parser.add_argument(
        'models',
        nargs='*',
        help='Either Keras (.net) or TensorFlow (.pb) models to test')
    args = TrainData.parse_args(parser)
    if not args.models and not args.input_file and args.folder:
        args.input_file = args.folder
    if bool(args.models) == bool(args.input_file):
        parser.error('Please specify either a list of models or an input file')

    if not args.output_file:
        load_plt()  # Error early if matplotlib not installed
    import numpy as np

    if args.models:
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        print('Data:', data)
        filenames = sum(
            data.train_files if args.use_train else data.test_files, [])
        loader = CachedDataLoader(
            partial(data.load,
                    args.use_train,
                    not args.use_train,
                    shuffle=False))
        model_data = calc_stats(args.models, loader, args.use_train, filenames)
    else:
        model_data = {
            name: Stats.from_np_dict(data)
            for name, data in np.load(args.input_file)['data'].item().items()
        }
        for name, stats in model_data.items():
            print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(),
                                                  stats.summary_str()))

    if args.output_file:
        np.savez(args.output_file,
                 data={
                     name: stats.to_np_dict()
                     for name, stats in model_data.items()
                 })
    else:
        plt = load_plt()
        decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center)
        thresholds = [
            decoder.encode(i)
            for i in np.linspace(0.0, 1.0, args.resolution)[1:-1]
        ]
        for model_name, stats in model_data.items():
            x = [stats.false_positives(i) for i in thresholds]
            y = [stats.false_negatives(i) for i in thresholds]
            plt.plot(x, y, marker='x', linestyle='-', label=model_name)
            if args.labels:
                for x, y, threshold in zip(x, y, thresholds):
                    plt.annotate('{:.4f}'.format(threshold), (x, y))

        plt.legend()
        plt.xlabel('False Positives')
        plt.ylabel('False Negatives')
        plt.show()