def configure_job_manager(self): import asyncio loop = asyncio.get_event_loop() from biothings.utils.manager import JobManager args = self.mixargs("job",{"num_workers":config.HUB_MAX_WORKERS,"max_memory_usage":config.HUB_MAX_MEM_USAGE}) job_manager = JobManager(loop,**args) self.managers["job_manager"] = job_manager
def test(): from biothings.utils.manager import JobManager from biothings.hub.dataindex.indexer import IndexManager loop = asyncio.get_event_loop() job_manager = JobManager(loop) index_manager = IndexManager(job_manager=job_manager) index_manager.configure(config.INDEX_CONFIG) snapshot_manager = SnapshotManager(index_manager=index_manager, job_manager=job_manager, poll_schedule="* * * * * */10") snapshot_manager.configure(config.SNAPSHOT_CONFIG) # snapshot_manager.poll("snapshot",snapshot_manager.snapshot_build) async def test_code(): snapshot_manager.snapshot('prod', 'mynews_202009170234_fjvg7skx', steps="post") asyncio.ensure_future(test_code()) loop.run_forever()
import config, biothings biothings.config_for_app(config) import logging # shut some mouths... logging.getLogger("elasticsearch").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("requests").setLevel(logging.ERROR) from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=config.HUB_MAX_WORKERS) thread_queue = concurrent.futures.ThreadPoolExecutor() loop.set_default_executor(process_queue) jmanager = JobManager(loop, process_queue, thread_queue, max_memory_usage=None, ) import dataload import biothings.dataload.uploader as uploader import biothings.dataload.dumper as dumper import biothings.databuild.builder as builder import biothings.databuild.differ as differ import biothings.databuild.syncer as syncer import biothings.dataindex.indexer as indexer from databuild.mapper import HasGeneMapper from databuild.builder import TaxonomyDataBuilder from dataindex.indexer import TaxonomyIndexer differ_manager = differ.DifferManager(job_manager=jmanager) differ_manager.configure()
biothings.config_for_app(config) from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() process_queue = concurrent.futures.ProcessPoolExecutor( max_workers=config.HUB_MAX_WORKERS) thread_queue = concurrent.futures.ThreadPoolExecutor() loop.set_default_executor(process_queue) max_mem = type( config.HUB_MAX_MEM_USAGE ) == int and config.HUB_MAX_MEM_USAGE * 1024**3 or config.HUB_MAX_MEM_USAGE job_manager = JobManager( loop, process_queue, thread_queue, max_memory_usage=max_mem, ) import dataload import biothings.dataload.uploader as uploader import biothings.dataload.dumper as dumper import biothings.databuild.builder as builder import biothings.databuild.differ as differ import biothings.databuild.syncer as syncer import biothings.dataindex.indexer as indexer from databuild.builder import MyVariantDataBuilder from databuild.mapper import TagObserved from dataindex.indexer import VariantIndexer # will check every 10 seconds for sources to upload
logging.getLogger("boto").setLevel(logging.ERROR) logging.getLogger("keylookup").setLevel(logging.INFO) logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND) logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE) from biothings.utils.hub import start_server, HubShell from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() process_queue = concurrent.futures.ProcessPoolExecutor( max_workers=config.HUB_MAX_WORKERS) thread_queue = concurrent.futures.ThreadPoolExecutor() loop.set_default_executor(process_queue) job_manager = JobManager(loop, num_workers=config.HUB_MAX_WORKERS, max_memory_usage=config.HUB_MAX_MEM_USAGE) shell = HubShell(job_manager) import hub.dataload import biothings.hub.dataload.uploader as uploader import biothings.hub.dataload.dumper as dumper import biothings.hub.dataload.source as source import biothings.hub.databuild.builder as builder import biothings.hub.databuild.differ as differ import biothings.hub.databuild.syncer as syncer import biothings.hub.dataindex.indexer as indexer import biothings.hub.datainspect.inspector as inspector from biothings.hub.api.manager import APIManager from hub.databuild.builder import MyGeneDataBuilder
import config, biothings biothings.config_for_app(config) import logging # shut some mouths... logging.getLogger("elasticsearch").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("requests").setLevel(logging.ERROR) logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND) logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE) from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() jmanager = JobManager(loop, num_workers=config.HUB_MAX_WORKERS, max_memory_usage=config.HUB_MAX_MEM_USAGE) import hub.dataload import biothings.hub.dataload.uploader as uploader import biothings.hub.dataload.dumper as dumper import biothings.hub.databuild.builder as builder import biothings.hub.databuild.differ as differ import biothings.hub.databuild.syncer as syncer import biothings.hub.dataindex.indexer as indexer from hub.databuild.mapper import HasGeneMapper from hub.databuild.builder import TaxonomyDataBuilder from hub.dataindex.indexer import TaxonomyIndexer differ_manager = differ.DifferManager(job_manager=jmanager, poll_schedule="* * * * * */10")
biothings.config_for_app(config) import logging # shut some mouths... logging.getLogger("elasticsearch").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("requests").setLevel(logging.ERROR) logging.getLogger("boto").setLevel(logging.ERROR) logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND) logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE) from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() job_manager = JobManager(loop, num_workers=config.HUB_MAX_WORKERS, num_threads=config.HUB_MAX_THREADS, max_memory_usage=config.HUB_MAX_MEM_USAGE) import hub.dataload import biothings.hub.dataload.uploader as uploader import biothings.hub.dataload.dumper as dumper import biothings.hub.dataload.source as source import biothings.hub.databuild.builder as builder import biothings.hub.databuild.differ as differ import biothings.hub.databuild.syncer as syncer import biothings.hub.dataindex.indexer as indexer import biothings.hub.datainspect.inspector as inspector from biothings.hub.api.manager import APIManager from hub.databuild.builder import MyVariantDataBuilder from hub.databuild.mapper import TagObserved from hub.dataindex.indexer import VariantIndexer
import logging # shut some mouths... logging.getLogger("elasticsearch").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("requests").setLevel(logging.ERROR) logging.getLogger("boto").setLevel(logging.ERROR) logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND) logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE) from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=config.HUB_MAX_WORKERS) thread_queue = concurrent.futures.ThreadPoolExecutor() loop.set_default_executor(process_queue) job_manager = JobManager(loop,num_workers=config.HUB_MAX_WORKERS, max_memory_usage=config.HUB_MAX_MEM_USAGE) import hub.dataload import biothings.hub.dataload.uploader as uploader import biothings.hub.dataload.dumper as dumper import biothings.hub.databuild.builder as builder import biothings.hub.databuild.differ as differ import biothings.hub.databuild.syncer as syncer import biothings.hub.dataindex.indexer as indexer from hub.databuild.builder import MyGeneDataBuilder from hub.databuild.mapper import EntrezRetired2Current, Ensembl2Entrez from hub.dataindex.indexer import GeneIndexer import biothings.utils.mongo as mongo # will check every 10 seconds for sources to upload upload_manager = uploader.UploaderManager(poll_schedule = '* * * * * */10', job_manager=job_manager)