def configure_build_manager(self): observed = TagObserved(name="observed") build_manager = builder.BuilderManager( builder_class=partial(MyVariantDataBuilder, mappers=[observed]), job_manager=self.managers["job_manager"]) build_manager.configure() self.managers["build_manager"] = build_manager self.logger.info("Using custom builder %s" % MyVariantDataBuilder)
def configure_build_manager(self): mygeneset_mapper = MyGenesetMapper(name="count_genes") partial_builder = partial(builder.DataBuilder, mappers=[mygeneset_mapper]) build_manager = builder.BuilderManager( job_manager=self.managers["job_manager"], builder_class=partial_builder) build_manager.configure() build_manager.poll() self.managers["build_manager"] = build_manager
def configure_build_manager(self): retired2current = EntrezRetired2Current(convert_func=int,db_provider=mongo.get_src_db) ensembl2entrez = Ensembl2Entrez(db_provider=mongo.get_src_db, retired2current=retired2current) build_manager = builder.BuilderManager( builder_class=partial(MyGeneDataBuilder,mappers=[ensembl2entrez]), job_manager=self.managers["job_manager"]) build_manager.configure() self.managers["build_manager"] = build_manager self.logger.info("Using custom builder %s" % MyGeneDataBuilder)
def configure_build_manager(self): hasgene = HasGeneMapper(name="has_gene") pbuilder = partial(TaxonomyDataBuilder, mappers=[hasgene]) build_manager = builder.BuilderManager( job_manager=self.managers["job_manager"], builder_class=pbuilder, poll_schedule="* * * * * */10") build_manager.configure() build_manager.configure() self.managers["build_manager"] = build_manager self.logger.info("Using custom builder %s" % TaxonomyDataBuilder)
def configure_build_manager(self): observed = TagObserved(name="observed") observed_skipidtoolong = TagObservedAndSkipLongId( name="observed_skipidtoolong") mvbuilder = partial(MyVariantDataBuilder, mappers=[observed, observed_skipidtoolong]) sharded_mvbuilder = partial(MyVariantShardedDataBuilder, mappers=[observed, observed_skipidtoolong]) build_manager = builder.BuilderManager( builder_class=[mvbuilder, sharded_mvbuilder], job_manager=self.managers["job_manager"]) build_manager.configure() self.managers["build_manager"] = build_manager self.logger.info("Using custom builder %s" % MyVariantDataBuilder)
data_plugin_manager=dp_manager, dumper_manager=dmanager, uploader_manager=upload_manager, keylookup=hub.keylookup.MyGeneKeyLookup, job_manager=job_manager) # register available plugin assitant assistant_manager.configure() # load existing plugins assistant_manager.load() retired2current = EntrezRetired2Current(convert_func=int, db_provider=mongo.get_src_db) ensembl2entrez = Ensembl2Entrez(db_provider=mongo.get_src_db, retired2current=retired2current) build_manager = builder.BuilderManager(builder_class=partial( MyGeneDataBuilder, mappers=[ensembl2entrez]), job_manager=job_manager) build_manager.configure() diff_manager = differ.DifferManager(job_manager=job_manager, poll_schedule="* * * * * */10") diff_manager.configure() diff_manager.poll( "diff", lambda doc: shell.launch( partial(diff_manager.diff, "jsondiff-selfcontained", old=None, new=doc["_id"]))) diff_manager.poll( "release_note", lambda doc: shell.launch( partial(diff_manager.release_note, old=None, new=doc["_id"])))
syncer_manager.configure() dmanager = dumper.DumperManager(job_manager=jmanager) dmanager.register_sources(hub.dataload.__sources__) dmanager.schedule_all() # will check every 10 seconds for sources to upload umanager = uploader.UploaderManager(poll_schedule='* * * * * */10', job_manager=jmanager) umanager.register_sources(hub.dataload.__sources__) umanager.poll('upload', lambda doc: umanager.upload_src(doc["_id"])) hasgene = HasGeneMapper(name="has_gene") pbuilder = partial(TaxonomyDataBuilder, mappers=[hasgene]) bmanager = builder.BuilderManager(job_manager=jmanager, builder_class=pbuilder, poll_schedule="* * * * * */10") bmanager.configure() bmanager.poll("build", lambda conf: bmanager.merge(conf["_id"])) pindexer = partial(TaxonomyIndexer, es_host=config.ES_HOST) index_manager = indexer.IndexerManager(job_manager=jmanager) index_manager.configure([{"default": pindexer}]) from biothings.utils.hub import schedule, pending, done COMMANDS = OrderedDict() # dump commands COMMANDS["dump"] = dmanager.dump_src # upload commands COMMANDS["upload"] = umanager.upload_src
shell = HubShell(job_manager) # will check every 10 seconds for sources to upload upload_manager = uploader.UploaderManager(poll_schedule='* * * * * */10', job_manager=job_manager) dump_manager = dumper.DumperManager(job_manager=job_manager) sources_path = hub.dataload.__sources_dict__ smanager = source.SourceManager(sources_path, dump_manager, upload_manager) dump_manager.schedule_all() upload_manager.poll( 'upload', lambda doc: shell.launch(partial(upload_manager.upload_src, doc["_id"]))) build_manager = builder.BuilderManager(builder_class=MyChemDataBuilder, job_manager=job_manager) build_manager.configure() differ_manager = differ.DifferManager(job_manager=job_manager, poll_schedule="* * * * * */10") differ_manager.configure() differ_manager.poll( "diff", lambda doc: differ_manager.diff( "jsondiff-selfcontained", old=None, new=doc["_id"])) differ_manager.poll( "release_note", lambda doc: differ_manager.release_note(old=None, new=doc["_id"])) # test will access localhost ES, no need to throttle syncer_manager_test = syncer.SyncerManager(job_manager=job_manager) syncer_manager_test.configure()
# deal with 3rdparty datasources import biothings.hub.dataplugin.assistant as assistant from biothings.hub.dataplugin.manager import DataPluginManager dp_manager = DataPluginManager(job_manager=job_manager) assistant_manager = assistant.AssistantManager(data_plugin_manager=dp_manager, dumper_manager=dmanager, uploader_manager=upload_manager, job_manager=job_manager) # register available plugin assitant assistant_manager.configure() # load existing plugins assistant_manager.load() observed = TagObserved(name="observed") build_manager = builder.BuilderManager(builder_class=partial( MyVariantDataBuilder, mappers=[observed]), job_manager=job_manager) build_manager.configure() differ_manager = differ.DifferManager(job_manager=job_manager) differ_manager.configure( [differ.ColdHotSelfContainedJsonDiffer, differ.SelfContainedJsonDiffer]) inspector = inspector.InspectorManager(upload_manager=upload_manager, build_manager=build_manager, job_manager=job_manager) from biothings.hub.databuild.syncer import ThrottledESColdHotJsonDiffSelfContainedSyncer, ThrottledESJsonDiffSelfContainedSyncer, \ ESColdHotJsonDiffSelfContainedSyncer, ESJsonDiffSelfContainedSyncer syncer_manager = syncer.SyncerManager(job_manager=job_manager) syncer_manager.configure(klasses=[