def model_publisher(scheduler, sources): file_source = sources.file.response.pipe(ops.share()) # kafka driver bootstrap. fixme kafka_source = sources.kafka.response.pipe( ops.do_action(print), ops.replay(), ops.ref_count(), ) kafka_source.subscribe() config, config_read_request, http_request = read_config_from_args( sources.argv.argv, file_source, sources.http.response, scheduler=scheduler) config = config.pipe(ops.first()) kafka_request = config.pipe(ops.map(lambda c: create_model_topics(c)), ) return ModelPublisherSink( file=file.Sink(request=rx.merge(config_read_request)), http=http.Sink(request=http_request), kafka=kafka.Sink(request=kafka_request), )
def audio_encoder(sources): # Parse configuration parser = create_arg_parser() read_request, read_response = sources.argv.argv.pipe( ops.skip(1), argparse.parse(parser), ops.filter(lambda i: i.key == 'config'), ops.map(lambda i: file.Read(id='config', path=i.value)), file.read(sources.file.response), ) config = read_response.pipe( ops.filter(lambda i: i.id == "config"), ops.flat_map(lambda i: i.data), parse_config, ) # Transcode request handling encode_init = config.pipe( ops.map( lambda i: encoder.Initialize(storage_path=i.encode.storage_path))) encode_request = sources.httpd.route.pipe( ops.filter(lambda i: i.id == 'flac_transcode'), ops.flat_map(lambda i: i.request), ops.map(lambda i: encoder.EncodeMp3( id=i.context, data=i.data, key=i.match_info['key'])), ) encoder_request = rx.merge(encode_init, encode_request) # http server http_init = config.pipe( ops.flat_map(lambda i: rx.from_([ httpd.Initialize(request_max_size=0), httpd.AddRoute( methods=['POST'], path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}', id='flac_transcode', ), httpd.StartServer(host=i.server.http.host, port=i.server.http.port ), ]))) http_response = sources.encoder.response.pipe( ops.map(lambda i: httpd.Response( data='ok'.encode('utf-8'), context=i.id, ))) http = rx.merge(http_init, http_response) # merge sink requests file_requests = read_request return Sink( encoder=encoder.Sink(request=encoder_request), file=file.Sink(request=file_requests), httpd=httpd.Sink(control=http), )
def audio_encoder(sources): # Parse configuration read_config_file = (sources.argv.argv.skip(1).let( argparse.argparse, parser=Observable.just( argparse.Parser(description="audio encode server")), arguments=Observable.from_([ argparse.ArgumentDef(name='--config', help="Path of the server configuration file") ])).filter(lambda i: i.key == 'config').map( lambda i: file.Read(id='config', path=i.value))) config = sources.file.response.let(parse_config) # Transcode request handling encode_init = (config.map( lambda i: encoder.Initialize(storage_path=i.encode.storage_path))) encode_request = ( sources.httpd.route.filter(lambda i: i.id == 'flac_transcode'). flat_map(lambda i: i.request).map(lambda i: encoder.EncodeMp3( id=i.context, data=i.data, key=i.match_info['key']))) encoder_request = Observable.merge(encode_init, encode_request) # http server http_init = (config.flat_map(lambda i: Observable.from_([ httpd.Initialize(request_max_size=0), httpd.AddRoute( methods=['POST'], path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}', id='flac_transcode', ), httpd.StartServer(host=i.server.http.host, port=i.server.http.port), ]))) http_response = (sources.encoder.response.map(lambda i: httpd.Response( data='ok'.encode('utf-8'), context=i.id, ))) http = Observable.merge(http_init, http_response) # merge sink requests file_requests = read_config_file return Sink( encoder=encoder.Sink(request=encoder_request), file=file.Sink(request=file_requests), httpd=httpd.Sink(control=http), )
def makinage(aio_scheduler, sources): def on_error(e): raise e config, read_request, http_request = read_config_from_args( sources.argv.argv, sources.file.response, sources.http.response, scheduler=aio_scheduler ) first_config = rx.concat(config.pipe(ops.take(1),), rx.never()) kafka_source = sources.kafka.response.pipe( trace_observable("kafka source1"), ops.replay(), ops.ref_count(), trace_observable("kafka source2"), ) kafka_source.subscribe(on_error=on_error) kafka_request = first_config.pipe( ops.flat_map(lambda i: create_operators( i, config, kafka_source, sources.kafka.feedback.pipe(ops.share()), )), ops.subscribe_on(aio_scheduler), trace_observable("makinage"), ) ''' config.pipe(ops.subscribe_on(aio_scheduler)).subscribe( on_next=print, on_error=print, ) ''' return MakiNageSink( file=file.Sink(request=read_request), http=http.Sink(request=http_request), kafka=kafka.Sink(request=kafka_request), )
def deepspeech_server(sources): argv = sources.argv.argv stt = sources.httpd.route stt_response = sources.deepspeech.text.share() ds_logs = sources.deepspeech.log config_data = sources.file.response http_ds_error, route_ds_error = make_error_router() args = argparse.argparse( argv=argv.skip(1).subscribe_on(aio_scheduler), parser=Observable.just( argparse.Parser(description="deepspeech server")), arguments=Observable.from_([ argparse.ArgumentDef(name='--config', help="Path of the server configuration file") ])) config_file = (args.filter(lambda i: i.key == 'config').map( lambda i: file.Read(id='config', path=i.value))) config = parse_config(config_data).subscribe_on(aio_scheduler) logs_config = (config.flat_map(lambda i: Observable.from_(i.log.level).map( lambda i: logging.SetLevel(logger=i.logger, level=i.level)).concat( Observable.just(logging.SetLevelDone())))) logs = Observable.merge(logs_config, ds_logs) log_ready = sources.logging.response.take(1) ds_stt = (stt.flat_map(lambda i: i.request).map( lambda i: deepspeech.SpeechToText(data=i.data, context=i.context))) ds_arg = ( # config is hot, the combine operator allows to keep its last value # until logging is initialized log_ready.combine_latest( config, lambda _, i: i).map(lambda i: deepspeech.Initialize( model=i.deepspeech.model, alphabet=i.deepspeech.alphabet, lm=i.deepspeech.lm, trie=i.deepspeech.trie, features=deepspeech.FeaturesParameters( n_features=i.deepspeech.features.n_features, n_context=i.deepspeech.features.n_context, beam_width=i.deepspeech.features.beam_width, lm_alpha=i.deepspeech.features.lm_alpha, lm_beta=i.deepspeech.features.lm_beta, ) if i.deepspeech.features is not None else None))) ds = ds_stt.merge(ds_arg) http_init = (config.flat_map(lambda i: Observable.from_([ httpd.Initialize(request_max_size=i.server.http.request_max_size), httpd.AddRoute( methods=['POST'], path='/stt', id='stt', ), httpd.StartServer(host=i.server.http.host, port=i.server.http.port), ]))) http_response = (stt_response.let(lambda x: route_ds_error( x, error_map=lambda e: httpd.Response( data="Speech to text error".encode('utf-8'), context=e.args[0].context, status=500))).map(lambda i: httpd.Response( data=i.text.encode('utf-8'), context=i.context, ))) http = Observable.merge(http_init, http_response, http_ds_error) return DeepspeechSink(file=file.Sink(request=config_file), logging=logging.Sink(request=logs), deepspeech=deepspeech.Sink(speech=ds), httpd=httpd.Sink(control=http))
def deepspeech_server(aio_scheduler, sources): argv = sources.argv.argv stt = sources.httpd.route stt_response = sources.deepspeech.text ds_logs = sources.deepspeech.log http_ds_error, route_ds_error = make_error_router() args = parse_arguments(argv) read_request, read_response = args.pipe( ops.map(lambda i: file.Read(id='config', path=i.value)), file.read(sources.file.response), ) read_request = read_request.pipe( ops.subscribe_on(aio_scheduler), ) config = parse_config(read_response) logs_config = config.pipe( ops.flat_map(lambda i: rx.from_(i.log.level, scheduler=ImmediateScheduler())), ops.map(lambda i: logging.SetLevel(logger=i.logger, level=i.level)), ) logs = rx.merge(logs_config, ds_logs) ds_stt = stt.pipe( ops.flat_map(lambda i: i.request), ops.map(lambda i: deepspeech.SpeechToText(data=i.data, context=i.context)), ) # config is hot, the combine operator allows to keep its last value # until logging is initialized ds_arg = config.pipe( ops.map(lambda i: deepspeech.Initialize( model=i.deepspeech.model, scorer=deepspeech.Scorer( scorer=getattr(i.deepspeech, 'scorer', None), lm_alpha=getattr(i.deepspeech, 'lm_alpha', None), lm_beta=getattr(i.deepspeech, 'lm_beta', None), ), beam_width=getattr(i.deepspeech, 'beam_width', None), )), ) ds = rx.merge(ds_stt, ds_arg) http_init = config.pipe( ops.flat_map(lambda i: rx.from_([ httpd.Initialize(request_max_size=i.server.http.request_max_size), httpd.AddRoute( methods=['POST'], path='/stt', id='stt', headers=MultiDict([('Content-Type', 'text/plain')]), ), httpd.StartServer( host=i.server.http.host, port=i.server.http.port), ])), ) http_response = stt_response.pipe( route_ds_error( error_map=lambda e: httpd.Response( data="Speech to text error".encode('utf-8'), context=e.args[0].context, status=500 )), ops.map(lambda i: httpd.Response( data=i.text.encode('utf-8'), context=i.context, )), ) http = rx.merge(http_init, http_response, http_ds_error) return DeepspeechSink( file=file.Sink(request=read_request), logging=logging.Sink(request=logs), deepspeech=deepspeech.Sink(speech=ds), httpd=httpd.Sink(control=http) )
def audio_encoder(sources): # Parse configuration parser = create_arg_parser() read_request, read_response = sources.argv.argv.pipe( ops.skip(1), argparse.parse(parser), ops.filter(lambda i: i.key == 'config'), ops.map(lambda i: file.Read(id='config', path=i.value)), file.read(sources.file.response), ) config = read_response.pipe( ops.filter(lambda i: i.id == "config"), ops.flat_map(lambda i: i.data), parse_config, ) # Transcode request handling encode_init = config.pipe( ops.map( lambda i: encoder.Initialize(storage_path=i.encode.storage_path))) encode_request = sources.httpd.route.pipe( ops.filter(lambda i: i.id == 'flac_transcode'), ops.flat_map(lambda i: i.request), ops.do_action(lambda i: print("[{}]http req: {}".format( datetime.datetime.now(), threading.get_ident()))), #.observe_on(encode_scheduler) ops.flat_map(lambda i: Observable.just(i, encode_scheduler)), ops.do_action(lambda i: print("[{}]encode req: {}".format( datetime.datetime.now(), threading.get_ident()))), ops.map(lambda i: encoder.EncodeMp3( id=i.context, data=i.data, key=i.match_info['key'])), ) encoder_request = rx.merge(encode_init, encode_request) # store encoded file store_requests = sources.encoder.response.pipe( ops.do_action(lambda i: print("[{}]encode res: {}".format( datetime.datetime.now(), threading.get_ident()))), ops.observe_on(s3_scheduler), ops.do_action(lambda i: print("[{}]s3 req: {}".format( datetime.datetime.now(), threading.get_ident()))), ops.map(lambda i: s3.UploadObject( key=i.key + '.flac', data=i.data, id=i.id, )), ) # acknowledge http request http_response = sources.s3.response.pipe( ops.do_action(lambda i: print("[{}]s3 res: {}".format( datetime.datetime.now(), threading.get_ident()))), ops.do_action( lambda i: print("httpd res: {}".format(threading.get_ident()))), ops.map(lambda i: httpd.Response( data='ok'.encode('utf-8'), context=i.id, )), ) # http server http_init = config.pipe( ops.flat_map(lambda i: rx.from_([ httpd.Initialize(request_max_size=0), httpd.AddRoute( methods=['POST'], path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}', id='flac_transcode', ), httpd.StartServer(host=i.server.http.host, port=i.server.http.port ), ]))) http = rx.merge(http_init, http_response) # s3 database s3_init = config.pipe( ops.map(lambda i: s3.Configure( access_key=i.s3.access_key, secret_key=i.s3.secret_key, bucket=i.s3.bucket, endpoint_url=i.s3.endpoint_url, region_name=i.s3.region_name, ))) # merge sink requests file_requests = read_request s3_requests = rx.merge(s3_init, store_requests) return Sink( encoder=encoder.Sink(request=encoder_request), s3=s3.Sink(request=s3_requests), file=file.Sink(request=file_requests), httpd=httpd.Sink(control=http), )
def audio_encoder(sources): http_s3_error, route_s3_error = make_error_router() http_encode_error, route_encode_error = make_error_router() # Parse configuration parsed_argv = (sources.argv.argv.skip(1).let( argparse.argparse, parser=Observable.just( argparse.Parser(description="audio encode server")), arguments=Observable.from_([ argparse.ArgumentDef(name='--config', help="Path of the server configuration file") ])).filter(lambda i: i.key == 'config').subscribe_on( aio_scheduler).share()) # monitor and parse config file monitor_init = (parsed_argv.flat_map(lambda i: Observable.from_([ inotify.AddWatch( id='config', path=i.value, flags=aionotify.Flags.MODIFY), inotify.Start(), ]))) config_update = (sources.inotify.response.debounce(5000).map( lambda i: True).start_with(True)) read_config_file = (Observable.combine_latest( parsed_argv, config_update, lambda config, _: file.Read(id='config', path=config.value))) config = sources.file.response.let(parse_config) # Transcode request handling encode_init = (config.map(lambda i: i.encode).distinct_until_changed().map( lambda i: encoder.Configure(samplerate=i.samplerate, bitdepth=i.bitdepth))) encode_request = ( sources.httpd.route.filter(lambda i: i.id == 'flac_transcode'). flat_map(lambda i: i.request).flat_map(lambda i: Observable.just( i, encode_scheduler)).map(lambda i: encoder.EncodeMp3( id=i.context, data=i.data, key=i.match_info['key']))) encoder_request = Observable.merge(encode_init, encode_request) # store encoded file store_requests = (sources.encoder.response.let( catch_or_flat_map, error_router=route_encode_error, error_map=lambda i: httpd.Response(data='encode error'.encode('utf-8'), context=i.args[0].id, status=500)). observe_on(s3_scheduler).map(lambda i: s3.UploadObject( key=i.key + '.flac', data=i.data, id=i.id, ))) # acknowledge http request http_response = (sources.s3.response.let( catch_or_flat_map, error_router=route_s3_error, error_map=lambda i: httpd.Response(data='upload error'.encode('utf-8'), context=i.args[0].id, status=500)).map( lambda i: httpd.Response( data='ok'.encode('utf-8'), context=i.id, ))) # http server http_init = (config.take(1).flat_map(lambda i: Observable.from_([ httpd.Initialize(request_max_size=0), httpd.AddRoute( methods=['POST'], path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}', id='flac_transcode', ), httpd.StartServer(host=i.server.http.host, port=i.server.http.port), ]))) http = Observable.merge(http_init, http_response, http_s3_error, http_encode_error) # s3 database s3_init = (config.take(1).map(lambda i: s3.Configure( access_key=i.s3.access_key, secret_key=i.s3.secret_key, bucket=i.s3.bucket, endpoint_url=i.s3.endpoint_url, region_name=i.s3.region_name, ))) # merge sink requests file_requests = read_config_file s3_requests = Observable.merge(s3_init, store_requests) return Sink( encoder=encoder.Sink(request=encoder_request), s3=s3.Sink(request=s3_requests), file=file.Sink(request=file_requests), httpd=httpd.Sink(control=http), inotify=inotify.Sink(request=monitor_init), )
def audio_encoder(sources): # Parse configuration parser = create_arg_parser() parsed_argv = sources.argv.argv.pipe( ops.skip(1), argparse.parse(parser), ops.filter(lambda i: i.key == 'config'), ops.subscribe_on(aio_scheduler), ops.share(), ) # monitor and parse config file monitor_init = parsed_argv.pipe( ops.flat_map(lambda i: rx.from_([ inotify.AddWatch( id='config', path=i.value, flags=aionotify.Flags.MODIFY), inotify.Start(), ]))) config_update = sources.inotify.response.pipe( ops.debounce(5.0, scheduler=aio_scheduler), ops.map(lambda i: True), ops.start_with(True), ) read_request, read_response = rx.combine_latest( parsed_argv, config_update).pipe( ops.starmap( lambda config, _: file.Read(id='config', path=config.value)), file.read(sources.file.response), ) config = read_response.pipe( ops.filter(lambda i: i.id == "config"), ops.flat_map(lambda i: i.data), parse_config, ) # Transcode request handling encode_init = config.pipe( ops.map(lambda i: i.encode), ops.distinct_until_changed(), ops.map(lambda i: encoder.Configure(samplerate=i.samplerate, bitdepth=i.bitdepth)), ) encode_request = sources.httpd.route.pipe( ops.filter(lambda i: i.id == 'flac_transcode'), ops.flat_map(lambda i: i.request), ops.flat_map(lambda i: rx.just(i, encode_scheduler)), ops.map(lambda i: encoder.EncodeMp3( id=i.context, data=i.data, key=i.match_info['key'])), ) encoder_request = rx.merge(encode_init, encode_request) # store encoded file store_requests = sources.encoder.response.pipe( ops.observe_on(s3_scheduler), ops.map(lambda i: s3.UploadObject( key=i.key + '.flac', data=i.data, id=i.id, )), ) # acknowledge http request http_response = sources.s3.response.pipe( ops.map(lambda i: httpd.Response( data='ok'.encode('utf-8'), context=i.id, ))) # http server http_init = config.pipe( ops.take(1), ops.flat_map(lambda i: rx.from_([ httpd.Initialize(request_max_size=0), httpd.AddRoute( methods=['POST'], path='/api/transcode/v1/flac/{key:[a-zA-Z0-9-\._]*}', id='flac_transcode', ), httpd.StartServer(host=i.server.http.host, port=i.server.http.port ), ])), ) http = rx.merge(http_init, http_response) # s3 database s3_init = config.pipe( ops.take(1), ops.map(lambda i: s3.Configure( access_key=i.s3.access_key, secret_key=i.s3.secret_key, bucket=i.s3.bucket, endpoint_url=i.s3.endpoint_url, region_name=i.s3.region_name, )), ) # merge sink requests file_requests = read_request s3_requests = rx.merge(s3_init, store_requests) return Sink( encoder=encoder.Sink(request=encoder_request), s3=s3.Sink(request=s3_requests), file=file.Sink(request=file_requests), httpd=httpd.Sink(control=http), inotify=inotify.Sink(request=monitor_init), )
def extract_features(sources): aio_scheduler = AsyncIOScheduler() file_response = sources.file.response.share() config_sink = config.read_configuration( config.Source(file_response=file_response, argv=sources.argv.argv.subscribe_on(aio_scheduler))) configuration = config_sink.configuration.share() walk_adapter = walk.adapter(sources.walk.response) #file_adapter = file.adapter(sources.media_file.response) #write_feature_request, write_feature_file = router.make_crossroad_router(file_response) media_file_request, feature_file_request, process_path = path_processor.make_path_processor( sources.media_file.response, sources.feature_file.response) random_cross_request, cross_random = router.make_crossroad_router( sources.random.response) features = ( configuration.flat_map( lambda configuration: walk_adapter.api.walk(configuration.dataset. voxceleb2_path) # extract features from files .let( process_path, configuration=configuration, #file_adapter=file_adapter, ) # create sets .reduce(lambda acc, i: acc + [{ 'file': i, 'label': label_from_path(i), 'set': set_from_path(i), }], seed=[]) # todo: shuffle .map(train_test_split).flat_map( lambda dataset: Observable.just(dataset['test']).map(pair_set) # shuffle apn pairs .map(lambda i: random.Shuffle(id='dev_test_set', data=i)).let( cross_random).filter(lambda i: i.id == 'dev_test_set'). map(lambda i: i.data).map(lambda i: test_dev_split( i, configuration.dataset.dev_set_utterance_count, configuration.dataset.test_set_utterance_count)).map( lambda i: { 'train': dataset['train'], 'dev': i['dev'], 'test': i['test'], }))).share()) # save dataset json file write_dataset_request = (features.map(json.dumps).with_latest_from( configuration, lambda dataset, configuration: file.Write( id='write_dataset', path=configuration.dataset.path, data=dataset, mode='w')).share()) # random random_request = Observable.concat( configuration.map(lambda i: random.SetSeed(value=i.random_seed)), random_cross_request) logs = features exit = sources.dataset_file.response.ignore_elements() return Sink(file=file.Sink(request=config_sink.file_request), dataset_file=file.Sink(request=write_dataset_request), media_file=file.Sink(request=media_file_request), feature_file=file.Sink(request=feature_file_request), logging=logging.Sink(request=logs), walk=walk.Sink(request=walk_adapter.sink), stop=stop.Sink(control=exit), random=random.Sink(request=random_request))