Beispiel #1
0
    def test_create_driver(self):
        driver = logging.make_driver()
        request = Subject()
        sink = logging.Sink(request=request)
        source = driver.call(sink)

        self.assertIsNone(source)
Beispiel #2
0
    def test_create_driver(self):
        driver = logging.make_driver()
        request = Subject()
        sink = logging.Sink(request=request)
        source = driver.call(sink)

        self.assertIsInstance(source, logging.Source)
        self.assertIsNotNone(source.response)
Beispiel #3
0
    def test_log(self, mock_get_logger):
        logger = MagicMock(spec=std_logging.Logger)
        mock_get_logger.return_value = logger

        driver = logging.make_driver()
        request = Subject()
        sink = logging.Sink(request=request)
        driver.call(sink)

        request.on_next(
            logging.Log(logger='foo', level='DEBUG', message='foo msg'))
        mock_get_logger.assert_called_with('foo')
        logger.log.assert_called_with('DEBUG', 'foo msg')
Beispiel #4
0
    def test_set_level(self, mock_get_logger, mock_stream_handler):
        logger = MagicMock(spec=std_logging.Logger)
        stream_handler = MagicMock(spec=std_logging.Handler)
        mock_get_logger.return_value = logger
        mock_stream_handler.return_value = stream_handler

        driver = logging.make_driver()
        request = Subject()
        sink = logging.Sink(request=request)
        driver.call(sink)

        request.on_next(logging.SetLevel(logger='foo', level='DEBUG'))
        mock_get_logger.assert_called_with('foo')
        logger.setLevel.assert_called_with(std_logging.DEBUG)
        mock_stream_handler.assert_called_with()
        stream_handler.setLevel.assert_called_with(std_logging.DEBUG)
Beispiel #5
0
def deepspeech_server(sources):
    argv = sources.argv.argv
    stt = sources.httpd.route
    stt_response = sources.deepspeech.text.share()
    ds_logs = sources.deepspeech.log
    config_data = sources.file.response

    http_ds_error, route_ds_error = make_error_router()

    args = argparse.argparse(
        argv=argv.skip(1).subscribe_on(aio_scheduler),
        parser=Observable.just(
            argparse.Parser(description="deepspeech server")),
        arguments=Observable.from_([
            argparse.ArgumentDef(name='--config',
                                 help="Path of the server configuration file")
        ]))

    config_file = (args.filter(lambda i: i.key == 'config').map(
        lambda i: file.Read(id='config', path=i.value)))
    config = parse_config(config_data).subscribe_on(aio_scheduler)

    logs_config = (config.flat_map(lambda i: Observable.from_(i.log.level).map(
        lambda i: logging.SetLevel(logger=i.logger, level=i.level)).concat(
            Observable.just(logging.SetLevelDone()))))

    logs = Observable.merge(logs_config, ds_logs)
    log_ready = sources.logging.response.take(1)

    ds_stt = (stt.flat_map(lambda i: i.request).map(
        lambda i: deepspeech.SpeechToText(data=i.data, context=i.context)))

    ds_arg = (
        # config is hot, the combine operator allows to keep its last value
        # until logging is initialized
        log_ready.combine_latest(
            config, lambda _, i: i).map(lambda i: deepspeech.Initialize(
                model=i.deepspeech.model,
                alphabet=i.deepspeech.alphabet,
                lm=i.deepspeech.lm,
                trie=i.deepspeech.trie,
                features=deepspeech.FeaturesParameters(
                    n_features=i.deepspeech.features.n_features,
                    n_context=i.deepspeech.features.n_context,
                    beam_width=i.deepspeech.features.beam_width,
                    lm_alpha=i.deepspeech.features.lm_alpha,
                    lm_beta=i.deepspeech.features.lm_beta,
                ) if i.deepspeech.features is not None else None)))
    ds = ds_stt.merge(ds_arg)

    http_init = (config.flat_map(lambda i: Observable.from_([
        httpd.Initialize(request_max_size=i.server.http.request_max_size),
        httpd.AddRoute(
            methods=['POST'],
            path='/stt',
            id='stt',
        ),
        httpd.StartServer(host=i.server.http.host, port=i.server.http.port),
    ])))

    http_response = (stt_response.let(lambda x: route_ds_error(
        x,
        error_map=lambda e: httpd.Response(
            data="Speech to text error".encode('utf-8'),
            context=e.args[0].context,
            status=500))).map(lambda i: httpd.Response(
                data=i.text.encode('utf-8'),
                context=i.context,
            )))

    http = Observable.merge(http_init, http_response, http_ds_error)

    return DeepspeechSink(file=file.Sink(request=config_file),
                          logging=logging.Sink(request=logs),
                          deepspeech=deepspeech.Sink(speech=ds),
                          httpd=httpd.Sink(control=http))
Beispiel #6
0
def deepspeech_server(aio_scheduler, sources):
    argv = sources.argv.argv
    stt = sources.httpd.route
    stt_response = sources.deepspeech.text
    ds_logs = sources.deepspeech.log

    http_ds_error, route_ds_error = make_error_router()

    args = parse_arguments(argv)

    read_request, read_response = args.pipe(
        ops.map(lambda i: file.Read(id='config', path=i.value)),
        file.read(sources.file.response),
    )
    read_request = read_request.pipe(
        ops.subscribe_on(aio_scheduler),
    )
    config = parse_config(read_response)

    logs_config = config.pipe(
        ops.flat_map(lambda i: rx.from_(i.log.level, scheduler=ImmediateScheduler())),
        ops.map(lambda i: logging.SetLevel(logger=i.logger, level=i.level)),
    )
    logs = rx.merge(logs_config, ds_logs)

    ds_stt = stt.pipe(
        ops.flat_map(lambda i: i.request),
        ops.map(lambda i: deepspeech.SpeechToText(data=i.data, context=i.context)),
    )

    # config is hot, the combine operator allows to keep its last value
    # until logging is initialized
    ds_arg = config.pipe(
        ops.map(lambda i: deepspeech.Initialize(
            model=i.deepspeech.model,
            scorer=deepspeech.Scorer(
                scorer=getattr(i.deepspeech, 'scorer', None),
                lm_alpha=getattr(i.deepspeech, 'lm_alpha', None),
                lm_beta=getattr(i.deepspeech, 'lm_beta', None),
            ),
            beam_width=getattr(i.deepspeech, 'beam_width', None),
        )),
    )
    ds = rx.merge(ds_stt, ds_arg)

    http_init = config.pipe(
        ops.flat_map(lambda i: rx.from_([
            httpd.Initialize(request_max_size=i.server.http.request_max_size),
            httpd.AddRoute(
                methods=['POST'],
                path='/stt',
                id='stt',
                headers=MultiDict([('Content-Type', 'text/plain')]),
            ),
            httpd.StartServer(
                host=i.server.http.host,
                port=i.server.http.port),
        ])),
    )

    http_response = stt_response.pipe(
        route_ds_error(
            error_map=lambda e: httpd.Response(
                data="Speech to text error".encode('utf-8'),
                context=e.args[0].context,
                status=500
        )),
        ops.map(lambda i: httpd.Response(
            data=i.text.encode('utf-8'),
            context=i.context,
        )),
    )

    http = rx.merge(http_init, http_response, http_ds_error)

    return DeepspeechSink(
        file=file.Sink(request=read_request),
        logging=logging.Sink(request=logs),
        deepspeech=deepspeech.Sink(speech=ds),
        httpd=httpd.Sink(control=http)
    )
Beispiel #7
0
def extract_features(sources):
    aio_scheduler = AsyncIOScheduler()
    file_response = sources.file.response.share()
    config_sink = config.read_configuration(
        config.Source(file_response=file_response,
                      argv=sources.argv.argv.subscribe_on(aio_scheduler)))
    configuration = config_sink.configuration.share()

    walk_adapter = walk.adapter(sources.walk.response)
    #file_adapter = file.adapter(sources.media_file.response)
    #write_feature_request, write_feature_file = router.make_crossroad_router(file_response)
    media_file_request, feature_file_request, process_path = path_processor.make_path_processor(
        sources.media_file.response, sources.feature_file.response)
    random_cross_request, cross_random = router.make_crossroad_router(
        sources.random.response)

    features = (
        configuration.flat_map(
            lambda configuration: walk_adapter.api.walk(configuration.dataset.
                                                        voxceleb2_path)
            # extract features from files
            .let(
                process_path,
                configuration=configuration,
                #file_adapter=file_adapter,
            )
            # create sets
            .reduce(lambda acc, i: acc + [{
                'file': i,
                'label': label_from_path(i),
                'set': set_from_path(i),
            }],
                    seed=[])
            # todo: shuffle
            .map(train_test_split).flat_map(
                lambda dataset: Observable.just(dataset['test']).map(pair_set)
                # shuffle apn pairs
                .map(lambda i: random.Shuffle(id='dev_test_set', data=i)).let(
                    cross_random).filter(lambda i: i.id == 'dev_test_set').
                map(lambda i: i.data).map(lambda i: test_dev_split(
                    i, configuration.dataset.dev_set_utterance_count,
                    configuration.dataset.test_set_utterance_count)).map(
                        lambda i: {
                            'train': dataset['train'],
                            'dev': i['dev'],
                            'test': i['test'],
                        }))).share())

    # save dataset json file
    write_dataset_request = (features.map(json.dumps).with_latest_from(
        configuration, lambda dataset, configuration: file.Write(
            id='write_dataset',
            path=configuration.dataset.path,
            data=dataset,
            mode='w')).share())

    # random
    random_request = Observable.concat(
        configuration.map(lambda i: random.SetSeed(value=i.random_seed)),
        random_cross_request)

    logs = features
    exit = sources.dataset_file.response.ignore_elements()

    return Sink(file=file.Sink(request=config_sink.file_request),
                dataset_file=file.Sink(request=write_dataset_request),
                media_file=file.Sink(request=media_file_request),
                feature_file=file.Sink(request=feature_file_request),
                logging=logging.Sink(request=logs),
                walk=walk.Sink(request=walk_adapter.sink),
                stop=stop.Sink(control=exit),
                random=random.Sink(request=random_request))