Esempio n. 1
0
 def configure_job_manager(self):
     import asyncio
     loop = asyncio.get_event_loop()
     from biothings.utils.manager import JobManager
     args = self.mixargs("job",{"num_workers":config.HUB_MAX_WORKERS,"max_memory_usage":config.HUB_MAX_MEM_USAGE})
     job_manager = JobManager(loop,**args)
     self.managers["job_manager"] = job_manager
Esempio n. 2
0
def test():
    from biothings.utils.manager import JobManager
    from biothings.hub.dataindex.indexer import IndexManager
    loop = asyncio.get_event_loop()
    job_manager = JobManager(loop)
    index_manager = IndexManager(job_manager=job_manager)
    index_manager.configure(config.INDEX_CONFIG)
    snapshot_manager = SnapshotManager(index_manager=index_manager,
                                       job_manager=job_manager,
                                       poll_schedule="* * * * * */10")
    snapshot_manager.configure(config.SNAPSHOT_CONFIG)

    # snapshot_manager.poll("snapshot",snapshot_manager.snapshot_build)
    async def test_code():
        snapshot_manager.snapshot('prod',
                                  'mynews_202009170234_fjvg7skx',
                                  steps="post")

    asyncio.ensure_future(test_code())
    loop.run_forever()
Esempio n. 3
0
import config, biothings
biothings.config_for_app(config)

import logging
# shut some mouths...
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)

from biothings.utils.manager import JobManager
loop = asyncio.get_event_loop()
process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=config.HUB_MAX_WORKERS)
thread_queue = concurrent.futures.ThreadPoolExecutor()
loop.set_default_executor(process_queue)
jmanager = JobManager(loop,
                      process_queue, thread_queue,
                      max_memory_usage=None,
                      )

import dataload
import biothings.dataload.uploader as uploader
import biothings.dataload.dumper as dumper
import biothings.databuild.builder as builder
import biothings.databuild.differ as differ
import biothings.databuild.syncer as syncer
import biothings.dataindex.indexer as indexer
from databuild.mapper import HasGeneMapper
from databuild.builder import TaxonomyDataBuilder
from dataindex.indexer import TaxonomyIndexer 

differ_manager = differ.DifferManager(job_manager=jmanager)
differ_manager.configure()
Esempio n. 4
0
biothings.config_for_app(config)

from biothings.utils.manager import JobManager

loop = asyncio.get_event_loop()
process_queue = concurrent.futures.ProcessPoolExecutor(
    max_workers=config.HUB_MAX_WORKERS)
thread_queue = concurrent.futures.ThreadPoolExecutor()
loop.set_default_executor(process_queue)
max_mem = type(
    config.HUB_MAX_MEM_USAGE
) == int and config.HUB_MAX_MEM_USAGE * 1024**3 or config.HUB_MAX_MEM_USAGE
job_manager = JobManager(
    loop,
    process_queue,
    thread_queue,
    max_memory_usage=max_mem,
)

import dataload
import biothings.dataload.uploader as uploader
import biothings.dataload.dumper as dumper
import biothings.databuild.builder as builder
import biothings.databuild.differ as differ
import biothings.databuild.syncer as syncer
import biothings.dataindex.indexer as indexer
from databuild.builder import MyVariantDataBuilder
from databuild.mapper import TagObserved
from dataindex.indexer import VariantIndexer

# will check every 10 seconds for sources to upload
Esempio n. 5
0
logging.getLogger("boto").setLevel(logging.ERROR)
logging.getLogger("keylookup").setLevel(logging.INFO)

logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND)
logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE)

from biothings.utils.hub import start_server, HubShell

from biothings.utils.manager import JobManager
loop = asyncio.get_event_loop()
process_queue = concurrent.futures.ProcessPoolExecutor(
    max_workers=config.HUB_MAX_WORKERS)
thread_queue = concurrent.futures.ThreadPoolExecutor()
loop.set_default_executor(process_queue)
job_manager = JobManager(loop,
                         num_workers=config.HUB_MAX_WORKERS,
                         max_memory_usage=config.HUB_MAX_MEM_USAGE)

shell = HubShell(job_manager)

import hub.dataload
import biothings.hub.dataload.uploader as uploader
import biothings.hub.dataload.dumper as dumper
import biothings.hub.dataload.source as source
import biothings.hub.databuild.builder as builder
import biothings.hub.databuild.differ as differ
import biothings.hub.databuild.syncer as syncer
import biothings.hub.dataindex.indexer as indexer
import biothings.hub.datainspect.inspector as inspector
from biothings.hub.api.manager import APIManager
from hub.databuild.builder import MyGeneDataBuilder
Esempio n. 6
0
import config, biothings
biothings.config_for_app(config)

import logging
# shut some mouths...
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)

logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND)
logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE)

from biothings.utils.manager import JobManager
loop = asyncio.get_event_loop()
jmanager = JobManager(loop,
                      num_workers=config.HUB_MAX_WORKERS,
                      max_memory_usage=config.HUB_MAX_MEM_USAGE)

import hub.dataload
import biothings.hub.dataload.uploader as uploader
import biothings.hub.dataload.dumper as dumper
import biothings.hub.databuild.builder as builder
import biothings.hub.databuild.differ as differ
import biothings.hub.databuild.syncer as syncer
import biothings.hub.dataindex.indexer as indexer
from hub.databuild.mapper import HasGeneMapper
from hub.databuild.builder import TaxonomyDataBuilder
from hub.dataindex.indexer import TaxonomyIndexer

differ_manager = differ.DifferManager(job_manager=jmanager,
                                      poll_schedule="* * * * * */10")
Esempio n. 7
0
biothings.config_for_app(config)

import logging
# shut some mouths...
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)
logging.getLogger("boto").setLevel(logging.ERROR)

logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND)
logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE)

from biothings.utils.manager import JobManager
loop = asyncio.get_event_loop()
job_manager = JobManager(loop,
                         num_workers=config.HUB_MAX_WORKERS,
                         num_threads=config.HUB_MAX_THREADS,
                         max_memory_usage=config.HUB_MAX_MEM_USAGE)

import hub.dataload
import biothings.hub.dataload.uploader as uploader
import biothings.hub.dataload.dumper as dumper
import biothings.hub.dataload.source as source
import biothings.hub.databuild.builder as builder
import biothings.hub.databuild.differ as differ
import biothings.hub.databuild.syncer as syncer
import biothings.hub.dataindex.indexer as indexer
import biothings.hub.datainspect.inspector as inspector
from biothings.hub.api.manager import APIManager
from hub.databuild.builder import MyVariantDataBuilder
from hub.databuild.mapper import TagObserved
from hub.dataindex.indexer import VariantIndexer
Esempio n. 8
0
import logging
# shut some mouths...
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)
logging.getLogger("boto").setLevel(logging.ERROR)

logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND)
logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE)

from biothings.utils.manager import JobManager
loop = asyncio.get_event_loop()
process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=config.HUB_MAX_WORKERS)
thread_queue = concurrent.futures.ThreadPoolExecutor()
loop.set_default_executor(process_queue)
job_manager = JobManager(loop,num_workers=config.HUB_MAX_WORKERS,
                      max_memory_usage=config.HUB_MAX_MEM_USAGE)

import hub.dataload
import biothings.hub.dataload.uploader as uploader
import biothings.hub.dataload.dumper as dumper
import biothings.hub.databuild.builder as builder
import biothings.hub.databuild.differ as differ
import biothings.hub.databuild.syncer as syncer
import biothings.hub.dataindex.indexer as indexer
from hub.databuild.builder import MyGeneDataBuilder
from hub.databuild.mapper import EntrezRetired2Current, Ensembl2Entrez
from hub.dataindex.indexer import GeneIndexer
import biothings.utils.mongo as mongo

# will check every 10 seconds for sources to upload
upload_manager = uploader.UploaderManager(poll_schedule = '* * * * * */10', job_manager=job_manager)