Exemple #1
0
def model_publisher(scheduler, sources):
    file_source = sources.file.response.pipe(ops.share())

    # kafka driver bootstrap. fixme
    kafka_source = sources.kafka.response.pipe(
        ops.do_action(print),
        ops.replay(),
        ops.ref_count(),
    )
    kafka_source.subscribe()

    config, config_read_request, http_request = read_config_from_args(
        sources.argv.argv,
        file_source,
        sources.http.response,
        scheduler=scheduler)

    config = config.pipe(ops.first())

    kafka_request = config.pipe(ops.map(lambda c: create_model_topics(c)), )

    return ModelPublisherSink(
        file=file.Sink(request=rx.merge(config_read_request)),
        http=http.Sink(request=http_request),
        kafka=kafka.Sink(request=kafka_request),
    )
Exemple #2
0
def audio_encoder(sources):
    # Parse configuration
    parser = create_arg_parser()

    read_request, read_response = sources.argv.argv.pipe(
        ops.skip(1),
        argparse.parse(parser),
        ops.filter(lambda i: i.key == 'config'),
        ops.map(lambda i: file.Read(id='config', path=i.value)),
        file.read(sources.file.response),
    )
    config = read_response.pipe(
        ops.filter(lambda i: i.id == "config"),
        ops.flat_map(lambda i: i.data),
        parse_config,
    )

    # Transcode request handling
    encode_init = config.pipe(
        ops.map(
            lambda i: encoder.Initialize(storage_path=i.encode.storage_path)))

    encode_request = sources.httpd.route.pipe(
        ops.filter(lambda i: i.id == 'flac_transcode'),
        ops.flat_map(lambda i: i.request),
        ops.map(lambda i: encoder.EncodeMp3(
            id=i.context, data=i.data, key=i.match_info['key'])),
    )
    encoder_request = rx.merge(encode_init, encode_request)

    # http server
    http_init = config.pipe(
        ops.flat_map(lambda i: rx.from_([
            httpd.Initialize(request_max_size=0),
            httpd.AddRoute(
                methods=['POST'],
                path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}',
                id='flac_transcode',
            ),
            httpd.StartServer(host=i.server.http.host, port=i.server.http.port
                              ),
        ])))

    http_response = sources.encoder.response.pipe(
        ops.map(lambda i: httpd.Response(
            data='ok'.encode('utf-8'),
            context=i.id,
        )))
    http = rx.merge(http_init, http_response)

    # merge sink requests
    file_requests = read_request

    return Sink(
        encoder=encoder.Sink(request=encoder_request),
        file=file.Sink(request=file_requests),
        httpd=httpd.Sink(control=http),
    )
Exemple #3
0
def audio_encoder(sources):
    # Parse configuration
    read_config_file = (sources.argv.argv.skip(1).let(
        argparse.argparse,
        parser=Observable.just(
            argparse.Parser(description="audio encode server")),
        arguments=Observable.from_([
            argparse.ArgumentDef(name='--config',
                                 help="Path of the server configuration file")
        ])).filter(lambda i: i.key == 'config').map(
            lambda i: file.Read(id='config', path=i.value)))
    config = sources.file.response.let(parse_config)

    # Transcode request handling
    encode_init = (config.map(
        lambda i: encoder.Initialize(storage_path=i.encode.storage_path)))

    encode_request = (
        sources.httpd.route.filter(lambda i: i.id == 'flac_transcode').
        flat_map(lambda i: i.request).map(lambda i: encoder.EncodeMp3(
            id=i.context, data=i.data, key=i.match_info['key'])))
    encoder_request = Observable.merge(encode_init, encode_request)

    # http server
    http_init = (config.flat_map(lambda i: Observable.from_([
        httpd.Initialize(request_max_size=0),
        httpd.AddRoute(
            methods=['POST'],
            path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}',
            id='flac_transcode',
        ),
        httpd.StartServer(host=i.server.http.host, port=i.server.http.port),
    ])))

    http_response = (sources.encoder.response.map(lambda i: httpd.Response(
        data='ok'.encode('utf-8'),
        context=i.id,
    )))
    http = Observable.merge(http_init, http_response)

    # merge sink requests
    file_requests = read_config_file

    return Sink(
        encoder=encoder.Sink(request=encoder_request),
        file=file.Sink(request=file_requests),
        httpd=httpd.Sink(control=http),
    )
Exemple #4
0
def makinage(aio_scheduler, sources):
    def on_error(e):
        raise e

    config, read_request, http_request = read_config_from_args(
        sources.argv.argv,
        sources.file.response,
        sources.http.response,
        scheduler=aio_scheduler
    )

    first_config = rx.concat(config.pipe(ops.take(1),), rx.never())

    kafka_source = sources.kafka.response.pipe(
        trace_observable("kafka source1"),
        ops.replay(),
        ops.ref_count(),
        trace_observable("kafka source2"),
    )
    kafka_source.subscribe(on_error=on_error)

    kafka_request = first_config.pipe(
        ops.flat_map(lambda i: create_operators(
            i, config,
            kafka_source,
            sources.kafka.feedback.pipe(ops.share()),
        )),
        ops.subscribe_on(aio_scheduler),
        trace_observable("makinage"),
    )

    '''
    config.pipe(ops.subscribe_on(aio_scheduler)).subscribe(
        on_next=print,
        on_error=print,
    )
    '''

    return MakiNageSink(
        file=file.Sink(request=read_request),
        http=http.Sink(request=http_request),
        kafka=kafka.Sink(request=kafka_request),
    )
Exemple #5
0
def deepspeech_server(sources):
    argv = sources.argv.argv
    stt = sources.httpd.route
    stt_response = sources.deepspeech.text.share()
    ds_logs = sources.deepspeech.log
    config_data = sources.file.response

    http_ds_error, route_ds_error = make_error_router()

    args = argparse.argparse(
        argv=argv.skip(1).subscribe_on(aio_scheduler),
        parser=Observable.just(
            argparse.Parser(description="deepspeech server")),
        arguments=Observable.from_([
            argparse.ArgumentDef(name='--config',
                                 help="Path of the server configuration file")
        ]))

    config_file = (args.filter(lambda i: i.key == 'config').map(
        lambda i: file.Read(id='config', path=i.value)))
    config = parse_config(config_data).subscribe_on(aio_scheduler)

    logs_config = (config.flat_map(lambda i: Observable.from_(i.log.level).map(
        lambda i: logging.SetLevel(logger=i.logger, level=i.level)).concat(
            Observable.just(logging.SetLevelDone()))))

    logs = Observable.merge(logs_config, ds_logs)
    log_ready = sources.logging.response.take(1)

    ds_stt = (stt.flat_map(lambda i: i.request).map(
        lambda i: deepspeech.SpeechToText(data=i.data, context=i.context)))

    ds_arg = (
        # config is hot, the combine operator allows to keep its last value
        # until logging is initialized
        log_ready.combine_latest(
            config, lambda _, i: i).map(lambda i: deepspeech.Initialize(
                model=i.deepspeech.model,
                alphabet=i.deepspeech.alphabet,
                lm=i.deepspeech.lm,
                trie=i.deepspeech.trie,
                features=deepspeech.FeaturesParameters(
                    n_features=i.deepspeech.features.n_features,
                    n_context=i.deepspeech.features.n_context,
                    beam_width=i.deepspeech.features.beam_width,
                    lm_alpha=i.deepspeech.features.lm_alpha,
                    lm_beta=i.deepspeech.features.lm_beta,
                ) if i.deepspeech.features is not None else None)))
    ds = ds_stt.merge(ds_arg)

    http_init = (config.flat_map(lambda i: Observable.from_([
        httpd.Initialize(request_max_size=i.server.http.request_max_size),
        httpd.AddRoute(
            methods=['POST'],
            path='/stt',
            id='stt',
        ),
        httpd.StartServer(host=i.server.http.host, port=i.server.http.port),
    ])))

    http_response = (stt_response.let(lambda x: route_ds_error(
        x,
        error_map=lambda e: httpd.Response(
            data="Speech to text error".encode('utf-8'),
            context=e.args[0].context,
            status=500))).map(lambda i: httpd.Response(
                data=i.text.encode('utf-8'),
                context=i.context,
            )))

    http = Observable.merge(http_init, http_response, http_ds_error)

    return DeepspeechSink(file=file.Sink(request=config_file),
                          logging=logging.Sink(request=logs),
                          deepspeech=deepspeech.Sink(speech=ds),
                          httpd=httpd.Sink(control=http))
Exemple #6
0
def deepspeech_server(aio_scheduler, sources):
    argv = sources.argv.argv
    stt = sources.httpd.route
    stt_response = sources.deepspeech.text
    ds_logs = sources.deepspeech.log

    http_ds_error, route_ds_error = make_error_router()

    args = parse_arguments(argv)

    read_request, read_response = args.pipe(
        ops.map(lambda i: file.Read(id='config', path=i.value)),
        file.read(sources.file.response),
    )
    read_request = read_request.pipe(
        ops.subscribe_on(aio_scheduler),
    )
    config = parse_config(read_response)

    logs_config = config.pipe(
        ops.flat_map(lambda i: rx.from_(i.log.level, scheduler=ImmediateScheduler())),
        ops.map(lambda i: logging.SetLevel(logger=i.logger, level=i.level)),
    )
    logs = rx.merge(logs_config, ds_logs)

    ds_stt = stt.pipe(
        ops.flat_map(lambda i: i.request),
        ops.map(lambda i: deepspeech.SpeechToText(data=i.data, context=i.context)),
    )

    # config is hot, the combine operator allows to keep its last value
    # until logging is initialized
    ds_arg = config.pipe(
        ops.map(lambda i: deepspeech.Initialize(
            model=i.deepspeech.model,
            scorer=deepspeech.Scorer(
                scorer=getattr(i.deepspeech, 'scorer', None),
                lm_alpha=getattr(i.deepspeech, 'lm_alpha', None),
                lm_beta=getattr(i.deepspeech, 'lm_beta', None),
            ),
            beam_width=getattr(i.deepspeech, 'beam_width', None),
        )),
    )
    ds = rx.merge(ds_stt, ds_arg)

    http_init = config.pipe(
        ops.flat_map(lambda i: rx.from_([
            httpd.Initialize(request_max_size=i.server.http.request_max_size),
            httpd.AddRoute(
                methods=['POST'],
                path='/stt',
                id='stt',
                headers=MultiDict([('Content-Type', 'text/plain')]),
            ),
            httpd.StartServer(
                host=i.server.http.host,
                port=i.server.http.port),
        ])),
    )

    http_response = stt_response.pipe(
        route_ds_error(
            error_map=lambda e: httpd.Response(
                data="Speech to text error".encode('utf-8'),
                context=e.args[0].context,
                status=500
        )),
        ops.map(lambda i: httpd.Response(
            data=i.text.encode('utf-8'),
            context=i.context,
        )),
    )

    http = rx.merge(http_init, http_response, http_ds_error)

    return DeepspeechSink(
        file=file.Sink(request=read_request),
        logging=logging.Sink(request=logs),
        deepspeech=deepspeech.Sink(speech=ds),
        httpd=httpd.Sink(control=http)
    )
Exemple #7
0
def audio_encoder(sources):
    # Parse configuration
    parser = create_arg_parser()

    read_request, read_response = sources.argv.argv.pipe(
        ops.skip(1),
        argparse.parse(parser),
        ops.filter(lambda i: i.key == 'config'),
        ops.map(lambda i: file.Read(id='config', path=i.value)),
        file.read(sources.file.response),
    )
    config = read_response.pipe(
        ops.filter(lambda i: i.id == "config"),
        ops.flat_map(lambda i: i.data),
        parse_config,
    )

    # Transcode request handling
    encode_init = config.pipe(
        ops.map(
            lambda i: encoder.Initialize(storage_path=i.encode.storage_path)))

    encode_request = sources.httpd.route.pipe(
        ops.filter(lambda i: i.id == 'flac_transcode'),
        ops.flat_map(lambda i: i.request),
        ops.do_action(lambda i: print("[{}]http req: {}".format(
            datetime.datetime.now(), threading.get_ident()))),
        #.observe_on(encode_scheduler)
        ops.flat_map(lambda i: Observable.just(i, encode_scheduler)),
        ops.do_action(lambda i: print("[{}]encode req: {}".format(
            datetime.datetime.now(), threading.get_ident()))),
        ops.map(lambda i: encoder.EncodeMp3(
            id=i.context, data=i.data, key=i.match_info['key'])),
    )
    encoder_request = rx.merge(encode_init, encode_request)

    # store encoded file
    store_requests = sources.encoder.response.pipe(
        ops.do_action(lambda i: print("[{}]encode res: {}".format(
            datetime.datetime.now(), threading.get_ident()))),
        ops.observe_on(s3_scheduler),
        ops.do_action(lambda i: print("[{}]s3 req: {}".format(
            datetime.datetime.now(), threading.get_ident()))),
        ops.map(lambda i: s3.UploadObject(
            key=i.key + '.flac',
            data=i.data,
            id=i.id,
        )),
    )

    # acknowledge http request
    http_response = sources.s3.response.pipe(
        ops.do_action(lambda i: print("[{}]s3 res: {}".format(
            datetime.datetime.now(), threading.get_ident()))),
        ops.do_action(
            lambda i: print("httpd res: {}".format(threading.get_ident()))),
        ops.map(lambda i: httpd.Response(
            data='ok'.encode('utf-8'),
            context=i.id,
        )),
    )

    # http server
    http_init = config.pipe(
        ops.flat_map(lambda i: rx.from_([
            httpd.Initialize(request_max_size=0),
            httpd.AddRoute(
                methods=['POST'],
                path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}',
                id='flac_transcode',
            ),
            httpd.StartServer(host=i.server.http.host, port=i.server.http.port
                              ),
        ])))
    http = rx.merge(http_init, http_response)

    # s3 database
    s3_init = config.pipe(
        ops.map(lambda i: s3.Configure(
            access_key=i.s3.access_key,
            secret_key=i.s3.secret_key,
            bucket=i.s3.bucket,
            endpoint_url=i.s3.endpoint_url,
            region_name=i.s3.region_name,
        )))

    # merge sink requests
    file_requests = read_request
    s3_requests = rx.merge(s3_init, store_requests)

    return Sink(
        encoder=encoder.Sink(request=encoder_request),
        s3=s3.Sink(request=s3_requests),
        file=file.Sink(request=file_requests),
        httpd=httpd.Sink(control=http),
    )
def audio_encoder(sources):
    http_s3_error, route_s3_error = make_error_router()
    http_encode_error, route_encode_error = make_error_router()

    # Parse configuration
    parsed_argv = (sources.argv.argv.skip(1).let(
        argparse.argparse,
        parser=Observable.just(
            argparse.Parser(description="audio encode server")),
        arguments=Observable.from_([
            argparse.ArgumentDef(name='--config',
                                 help="Path of the server configuration file")
        ])).filter(lambda i: i.key == 'config').subscribe_on(
            aio_scheduler).share())

    # monitor and parse config file
    monitor_init = (parsed_argv.flat_map(lambda i: Observable.from_([
        inotify.AddWatch(
            id='config', path=i.value, flags=aionotify.Flags.MODIFY),
        inotify.Start(),
    ])))

    config_update = (sources.inotify.response.debounce(5000).map(
        lambda i: True).start_with(True))

    read_config_file = (Observable.combine_latest(
        parsed_argv, config_update,
        lambda config, _: file.Read(id='config', path=config.value)))
    config = sources.file.response.let(parse_config)

    # Transcode request handling
    encode_init = (config.map(lambda i: i.encode).distinct_until_changed().map(
        lambda i: encoder.Configure(samplerate=i.samplerate,
                                    bitdepth=i.bitdepth)))

    encode_request = (
        sources.httpd.route.filter(lambda i: i.id == 'flac_transcode').
        flat_map(lambda i: i.request).flat_map(lambda i: Observable.just(
            i, encode_scheduler)).map(lambda i: encoder.EncodeMp3(
                id=i.context, data=i.data, key=i.match_info['key'])))
    encoder_request = Observable.merge(encode_init, encode_request)

    # store encoded file
    store_requests = (sources.encoder.response.let(
        catch_or_flat_map,
        error_router=route_encode_error,
        error_map=lambda i: httpd.Response(data='encode error'.encode('utf-8'),
                                           context=i.args[0].id,
                                           status=500)).
                      observe_on(s3_scheduler).map(lambda i: s3.UploadObject(
                          key=i.key + '.flac',
                          data=i.data,
                          id=i.id,
                      )))

    # acknowledge http request
    http_response = (sources.s3.response.let(
        catch_or_flat_map,
        error_router=route_s3_error,
        error_map=lambda i: httpd.Response(data='upload error'.encode('utf-8'),
                                           context=i.args[0].id,
                                           status=500)).map(
                                               lambda i: httpd.Response(
                                                   data='ok'.encode('utf-8'),
                                                   context=i.id,
                                               )))

    # http server
    http_init = (config.take(1).flat_map(lambda i: Observable.from_([
        httpd.Initialize(request_max_size=0),
        httpd.AddRoute(
            methods=['POST'],
            path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}',
            id='flac_transcode',
        ),
        httpd.StartServer(host=i.server.http.host, port=i.server.http.port),
    ])))
    http = Observable.merge(http_init, http_response, http_s3_error,
                            http_encode_error)

    # s3 database
    s3_init = (config.take(1).map(lambda i: s3.Configure(
        access_key=i.s3.access_key,
        secret_key=i.s3.secret_key,
        bucket=i.s3.bucket,
        endpoint_url=i.s3.endpoint_url,
        region_name=i.s3.region_name,
    )))

    # merge sink requests
    file_requests = read_config_file
    s3_requests = Observable.merge(s3_init, store_requests)

    return Sink(
        encoder=encoder.Sink(request=encoder_request),
        s3=s3.Sink(request=s3_requests),
        file=file.Sink(request=file_requests),
        httpd=httpd.Sink(control=http),
        inotify=inotify.Sink(request=monitor_init),
    )
def audio_encoder(sources):
    # Parse configuration
    parser = create_arg_parser()

    parsed_argv = sources.argv.argv.pipe(
        ops.skip(1),
        argparse.parse(parser),
        ops.filter(lambda i: i.key == 'config'),
        ops.subscribe_on(aio_scheduler),
        ops.share(),
    )

    # monitor and parse config file
    monitor_init = parsed_argv.pipe(
        ops.flat_map(lambda i: rx.from_([
            inotify.AddWatch(
                id='config', path=i.value, flags=aionotify.Flags.MODIFY),
            inotify.Start(),
        ])))

    config_update = sources.inotify.response.pipe(
        ops.debounce(5.0, scheduler=aio_scheduler),
        ops.map(lambda i: True),
        ops.start_with(True),
    )

    read_request, read_response = rx.combine_latest(
        parsed_argv, config_update).pipe(
            ops.starmap(
                lambda config, _: file.Read(id='config', path=config.value)),
            file.read(sources.file.response),
        )

    config = read_response.pipe(
        ops.filter(lambda i: i.id == "config"),
        ops.flat_map(lambda i: i.data),
        parse_config,
    )

    # Transcode request handling
    encode_init = config.pipe(
        ops.map(lambda i: i.encode),
        ops.distinct_until_changed(),
        ops.map(lambda i: encoder.Configure(samplerate=i.samplerate,
                                            bitdepth=i.bitdepth)),
    )

    encode_request = sources.httpd.route.pipe(
        ops.filter(lambda i: i.id == 'flac_transcode'),
        ops.flat_map(lambda i: i.request),
        ops.flat_map(lambda i: rx.just(i, encode_scheduler)),
        ops.map(lambda i: encoder.EncodeMp3(
            id=i.context, data=i.data, key=i.match_info['key'])),
    )
    encoder_request = rx.merge(encode_init, encode_request)

    # store encoded file
    store_requests = sources.encoder.response.pipe(
        ops.observe_on(s3_scheduler),
        ops.map(lambda i: s3.UploadObject(
            key=i.key + '.flac',
            data=i.data,
            id=i.id,
        )),
    )

    # acknowledge http request
    http_response = sources.s3.response.pipe(
        ops.map(lambda i: httpd.Response(
            data='ok'.encode('utf-8'),
            context=i.id,
        )))

    # http server
    http_init = config.pipe(
        ops.take(1),
        ops.flat_map(lambda i: rx.from_([
            httpd.Initialize(request_max_size=0),
            httpd.AddRoute(
                methods=['POST'],
                path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}',
                id='flac_transcode',
            ),
            httpd.StartServer(host=i.server.http.host, port=i.server.http.port
                              ),
        ])),
    )
    http = rx.merge(http_init, http_response)

    # s3 database
    s3_init = config.pipe(
        ops.take(1),
        ops.map(lambda i: s3.Configure(
            access_key=i.s3.access_key,
            secret_key=i.s3.secret_key,
            bucket=i.s3.bucket,
            endpoint_url=i.s3.endpoint_url,
            region_name=i.s3.region_name,
        )),
    )

    # merge sink requests
    file_requests = read_request
    s3_requests = rx.merge(s3_init, store_requests)

    return Sink(
        encoder=encoder.Sink(request=encoder_request),
        s3=s3.Sink(request=s3_requests),
        file=file.Sink(request=file_requests),
        httpd=httpd.Sink(control=http),
        inotify=inotify.Sink(request=monitor_init),
    )
Exemple #10
0
def extract_features(sources):
    aio_scheduler = AsyncIOScheduler()
    file_response = sources.file.response.share()
    config_sink = config.read_configuration(
        config.Source(file_response=file_response,
                      argv=sources.argv.argv.subscribe_on(aio_scheduler)))
    configuration = config_sink.configuration.share()

    walk_adapter = walk.adapter(sources.walk.response)
    #file_adapter = file.adapter(sources.media_file.response)
    #write_feature_request, write_feature_file = router.make_crossroad_router(file_response)
    media_file_request, feature_file_request, process_path = path_processor.make_path_processor(
        sources.media_file.response, sources.feature_file.response)
    random_cross_request, cross_random = router.make_crossroad_router(
        sources.random.response)

    features = (
        configuration.flat_map(
            lambda configuration: walk_adapter.api.walk(configuration.dataset.
                                                        voxceleb2_path)
            # extract features from files
            .let(
                process_path,
                configuration=configuration,
                #file_adapter=file_adapter,
            )
            # create sets
            .reduce(lambda acc, i: acc + [{
                'file': i,
                'label': label_from_path(i),
                'set': set_from_path(i),
            }],
                    seed=[])
            # todo: shuffle
            .map(train_test_split).flat_map(
                lambda dataset: Observable.just(dataset['test']).map(pair_set)
                # shuffle apn pairs
                .map(lambda i: random.Shuffle(id='dev_test_set', data=i)).let(
                    cross_random).filter(lambda i: i.id == 'dev_test_set').
                map(lambda i: i.data).map(lambda i: test_dev_split(
                    i, configuration.dataset.dev_set_utterance_count,
                    configuration.dataset.test_set_utterance_count)).map(
                        lambda i: {
                            'train': dataset['train'],
                            'dev': i['dev'],
                            'test': i['test'],
                        }))).share())

    # save dataset json file
    write_dataset_request = (features.map(json.dumps).with_latest_from(
        configuration, lambda dataset, configuration: file.Write(
            id='write_dataset',
            path=configuration.dataset.path,
            data=dataset,
            mode='w')).share())

    # random
    random_request = Observable.concat(
        configuration.map(lambda i: random.SetSeed(value=i.random_seed)),
        random_cross_request)

    logs = features
    exit = sources.dataset_file.response.ignore_elements()

    return Sink(file=file.Sink(request=config_sink.file_request),
                dataset_file=file.Sink(request=write_dataset_request),
                media_file=file.Sink(request=media_file_request),
                feature_file=file.Sink(request=feature_file_request),
                logging=logging.Sink(request=logs),
                walk=walk.Sink(request=walk_adapter.sink),
                stop=stop.Sink(control=exit),
                random=random.Sink(request=random_request))