def setUp(self): import conf_deep conf_deep.HUB_DB_BACKEND = { "module": "biothings.utils.mongo", "uri": "mongodb://su05:27017" } # mandatory for conf_deep.DATA_HUB_DB_DATABASE = "test_config" import biothings biothings.config_for_app(conf_deep) from biothings import config self.confobj = config self.config = self.confobj.show()["scope"]["config"]
def setUp(self): import conf_deep conf_deep.HUB_DB_BACKEND = { "module": "biothings.utils.sqlite3", "sqlite_db_folder": "." } # mandatory for conf_deep.DATA_HUB_DB_DATABASE = "hubdb_config" import biothings biothings.config_for_app(conf_deep) from biothings import config self.confobj = config self.config = self.confobj.show()["scope"]["config"]
def main(build_name,**kwargs): import biothings, config biothings.config_for_app(config) import biothings.databuild.builder as builder from databuild.builder import MyVariantDataBuilder loop = biothings.get_loop() bmanager = builder.BuilderManager(builder_class=MyVariantDataBuilder,event_loop=loop) bmanager.sync() # grab build configs job = bmanager.merge(build_name,**kwargs) loop.run_until_complete(job)
def main(build_name, **kwargs): import biothings, config biothings.config_for_app(config) import biothings.databuild.builder as builder from databuild.builder import MyVariantDataBuilder loop = biothings.get_loop() bmanager = builder.BuilderManager(builder_class=MyVariantDataBuilder, event_loop=loop) bmanager.sync() # grab build configs job = bmanager.merge(build_name, **kwargs) loop.run_until_complete(job)
def main(source, **kwargs): import biothings, config biothings.config_for_app(config) from biothings.dataload.dumper import SourceManager import dataload loop = biothings.get_loop() src_manager = SourceManager(loop) src_manager.register_source(source) jobs = src_manager.dump_src(source, **kwargs) loop.run_until_complete(asyncio.wait(jobs))
def main(source,**kwargs): import biothings, config biothings.config_for_app(config) from biothings.dataload.uploader import SourceManager import dataload loop = biothings.get_loop() src_manager = SourceManager(loop) src_manager.register_source(source) jobs = src_manager.upload_src(source,**kwargs) loop.run_until_complete(asyncio.wait(jobs))
def main(source): import biothings, config biothings.config_for_app(config) import dataload import biothings.dataload as btdataload # package1.module1.Class1 default_klass = "biothings.dataload.uploader.SourceManager" klass_path = getattr(config, "SOURCE_MANAGER_CLASS", default_klass) if not klass_path: klass_path = default_klass klass = get_class_from_classpath(klass_path) uploader = klass() uploader.register_sources(dataload.__sources_dict__) uploader.upload_src(source)
def setUp(self): import conf_base # reload as config manager may delete some params import importlib importlib.reload(conf_base) conf_base.HUB_DB_BACKEND = { "module": "biothings.utils.mongo", "uri": "mongodb://su05:27017" } # mandatory for conf_base.DATA_HUB_DB_DATABASE = "unittest_config" # simulaye config param set at runtime, not from config files conf_base.DYNPARAM = "runtime" import biothings biothings.config_for_app(conf_base) from biothings import config self.confobj = config self.config = self.confobj.show()["scope"]["config"] from biothings.utils.hub_db import get_hub_config self.hub_config = get_hub_config()
import biothings import config import hub.dataload.sources from biothings.hub import HubServer from biothings.utils.version import set_versions import os import logging # shut some mouths logging.getLogger("botocore").setLevel(logging.ERROR) logging.getLogger("boto3").setLevel(logging.ERROR) logging.getLogger("s3transfer").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) app_folder, _src = os.path.split(os.path.split(os.path.abspath(__file__))[0]) set_versions(config, app_folder) biothings.config_for_app(config) logging = config.logger server = HubServer(hub.dataload.sources, name="BioThings Studio") # import ptvsd # Allow other computers to attach to ptvsd at this IP address and port. # ptvsd.enable_attach(address=('1.2.3.4', 3000), redirect_output=True) # Pause the program until a remote debugger is attached # ptvsd.wait_for_attach() if __name__ == "__main__": # vanilla or as a launcher of an API from optparse import OptionParser
#!/usr/bin/env python import asyncio, asyncssh, sys import concurrent.futures from functools import partial import config, biothings biothings.config_for_app(config) import logging # shut some mouths... logging.getLogger("elasticsearch").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("requests").setLevel(logging.ERROR) from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=config.HUB_MAX_WORKERS) thread_queue = concurrent.futures.ThreadPoolExecutor() loop.set_default_executor(process_queue) jmanager = JobManager(loop, process_queue, thread_queue, max_memory_usage=None, ) import dataload import biothings.dataload.uploader as uploader import biothings.dataload.dumper as dumper import biothings.databuild.builder as builder import biothings.databuild.differ as differ import biothings.databuild.syncer as syncer
''' Generic BioMart Dumper And Ensembl (Vertebrate) BioMart Dumper ''' import os from ftplib import FTP import requests import config from biothings import config_for_app from biothings.hub.dataload.dumper import DumperException, HTTPDumper from biothings.utils.common import is_int, safewfile from biothings.utils.dataload import tab2list config_for_app(config) XML_QUERY_TEMPLATE_EXAMPLE = '''<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE Query> <Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" > <Dataset name = "hsapiens_gene_ensembl" interface = "default" > <Attribute name = "ensembl_gene_id" /> <Attribute name = "ensembl_transcript_id" /> <Attribute name = "ensembl_peptide_id" /> </Dataset> </Query> ''' XML_QUERY_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE Query> <Query virtualSchemaName = "%(virtual_schema)s" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" > <Dataset name = "%(dataset)s" interface = "default" >
import copy import re from biothings.hub.datatransform.datatransform import DataTransform from networkx import all_simple_paths, nx import biothings.utils.mongo as mongo from biothings.utils.loggers import get_logger from biothings import config as btconfig from biothings import config_for_app # Configuration of collections from biothings config file config_for_app(btconfig) # Setup logger and logging level kl_log = get_logger('keylookup', btconfig.LOG_FOLDER) class DataTransformSerial(DataTransform): # Constants DEFAULT_WEIGHT = 1 default_source = '_id' def __init__(self, G, collections, input_types, output_types, skip_on_failure=False, skip_w_regex=None): """ Initialize the keylookup object and precompute paths from the