def setUp(self):
     import conf_deep
     conf_deep.HUB_DB_BACKEND = {
         "module": "biothings.utils.mongo",
         "uri": "mongodb://su05:27017"
     }  # mandatory for
     conf_deep.DATA_HUB_DB_DATABASE = "test_config"
     import biothings
     biothings.config_for_app(conf_deep)
     from biothings import config
     self.confobj = config
     self.config = self.confobj.show()["scope"]["config"]
Example #2
0
 def setUp(self):
     import conf_deep
     conf_deep.HUB_DB_BACKEND = {
         "module": "biothings.utils.sqlite3",
         "sqlite_db_folder": "."
     }  # mandatory for
     conf_deep.DATA_HUB_DB_DATABASE = "hubdb_config"
     import biothings
     biothings.config_for_app(conf_deep)
     from biothings import config
     self.confobj = config
     self.config = self.confobj.show()["scope"]["config"]
Example #3
0
def main(build_name,**kwargs):

    import biothings, config
    biothings.config_for_app(config)
    import biothings.databuild.builder as builder
    from databuild.builder import MyVariantDataBuilder

    loop = biothings.get_loop()

    bmanager = builder.BuilderManager(builder_class=MyVariantDataBuilder,event_loop=loop)
    bmanager.sync() # grab build configs
    job = bmanager.merge(build_name,**kwargs)
    loop.run_until_complete(job)
Example #4
0
def main(build_name, **kwargs):

    import biothings, config
    biothings.config_for_app(config)
    import biothings.databuild.builder as builder
    from databuild.builder import MyVariantDataBuilder

    loop = biothings.get_loop()

    bmanager = builder.BuilderManager(builder_class=MyVariantDataBuilder,
                                      event_loop=loop)
    bmanager.sync()  # grab build configs
    job = bmanager.merge(build_name, **kwargs)
    loop.run_until_complete(job)
Example #5
0
def main(source, **kwargs):

    import biothings, config
    biothings.config_for_app(config)

    from biothings.dataload.dumper import SourceManager
    import dataload

    loop = biothings.get_loop()

    src_manager = SourceManager(loop)
    src_manager.register_source(source)
    jobs = src_manager.dump_src(source, **kwargs)

    loop.run_until_complete(asyncio.wait(jobs))
Example #6
0
def main(source,**kwargs):

    import biothings, config
    biothings.config_for_app(config)

    from biothings.dataload.uploader import SourceManager
    import dataload

    loop = biothings.get_loop()

    src_manager = SourceManager(loop)
    src_manager.register_source(source)
    jobs = src_manager.upload_src(source,**kwargs)

    loop.run_until_complete(asyncio.wait(jobs))
Example #7
0
def main(source):

    import biothings, config
    biothings.config_for_app(config)
    import dataload
    import biothings.dataload as btdataload

    # package1.module1.Class1
    default_klass = "biothings.dataload.uploader.SourceManager"
    klass_path = getattr(config, "SOURCE_MANAGER_CLASS", default_klass)
    if not klass_path:
        klass_path = default_klass
    klass = get_class_from_classpath(klass_path)
    uploader = klass()
    uploader.register_sources(dataload.__sources_dict__)
    uploader.upload_src(source)
Example #8
0
 def setUp(self):
     import conf_base
     # reload as config manager may delete some params
     import importlib
     importlib.reload(conf_base)
     conf_base.HUB_DB_BACKEND = {
         "module": "biothings.utils.mongo",
         "uri": "mongodb://su05:27017"
     }  # mandatory for
     conf_base.DATA_HUB_DB_DATABASE = "unittest_config"
     # simulaye config param set at runtime, not from config files
     conf_base.DYNPARAM = "runtime"
     import biothings
     biothings.config_for_app(conf_base)
     from biothings import config
     self.confobj = config
     self.config = self.confobj.show()["scope"]["config"]
     from biothings.utils.hub_db import get_hub_config
     self.hub_config = get_hub_config()
Example #9
0
import biothings
import config
import hub.dataload.sources
from biothings.hub import HubServer
from biothings.utils.version import set_versions
import os
import logging
# shut some mouths
logging.getLogger("botocore").setLevel(logging.ERROR)
logging.getLogger("boto3").setLevel(logging.ERROR)
logging.getLogger("s3transfer").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)

app_folder, _src = os.path.split(os.path.split(os.path.abspath(__file__))[0])
set_versions(config, app_folder)
biothings.config_for_app(config)
logging = config.logger

server = HubServer(hub.dataload.sources, name="BioThings Studio")

# import ptvsd

# Allow other computers to attach to ptvsd at this IP address and port.
# ptvsd.enable_attach(address=('1.2.3.4', 3000), redirect_output=True)

# Pause the program until a remote debugger is attached
# ptvsd.wait_for_attach()

if __name__ == "__main__":
    # vanilla or as a launcher of an API
    from optparse import OptionParser
Example #10
0
#!/usr/bin/env python

import asyncio, asyncssh, sys
import concurrent.futures
from functools import partial

import config, biothings
biothings.config_for_app(config)

import logging
# shut some mouths...
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)

from biothings.utils.manager import JobManager
loop = asyncio.get_event_loop()
process_queue = concurrent.futures.ProcessPoolExecutor(max_workers=config.HUB_MAX_WORKERS)
thread_queue = concurrent.futures.ThreadPoolExecutor()
loop.set_default_executor(process_queue)
jmanager = JobManager(loop,
                      process_queue, thread_queue,
                      max_memory_usage=None,
                      )

import dataload
import biothings.dataload.uploader as uploader
import biothings.dataload.dumper as dumper
import biothings.databuild.builder as builder
import biothings.databuild.differ as differ
import biothings.databuild.syncer as syncer
Example #11
0
''' Generic BioMart Dumper And Ensembl (Vertebrate) BioMart Dumper '''
import os
from ftplib import FTP

import requests

import config
from biothings import config_for_app
from biothings.hub.dataload.dumper import DumperException, HTTPDumper
from biothings.utils.common import is_int, safewfile
from biothings.utils.dataload import tab2list

config_for_app(config)


XML_QUERY_TEMPLATE_EXAMPLE = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Query>
<Query  virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" >

    <Dataset name = "hsapiens_gene_ensembl" interface = "default" >
        <Attribute name = "ensembl_gene_id" />
        <Attribute name = "ensembl_transcript_id" />
        <Attribute name = "ensembl_peptide_id" />
    </Dataset>
</Query>
'''

XML_QUERY_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Query>
<Query  virtualSchemaName = "%(virtual_schema)s" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" >
    <Dataset name = "%(dataset)s" interface = "default" >
Example #12
0
import copy
import re

from biothings.hub.datatransform.datatransform import DataTransform
from networkx import all_simple_paths, nx
import biothings.utils.mongo as mongo
from biothings.utils.loggers import get_logger
from biothings import config as btconfig
from biothings import config_for_app

# Configuration of collections from biothings config file
config_for_app(btconfig)

# Setup logger and logging level
kl_log = get_logger('keylookup', btconfig.LOG_FOLDER)


class DataTransformSerial(DataTransform):
    # Constants
    DEFAULT_WEIGHT = 1
    default_source = '_id'

    def __init__(self,
                 G,
                 collections,
                 input_types,
                 output_types,
                 skip_on_failure=False,
                 skip_w_regex=None):
        """
        Initialize the keylookup object and precompute paths from the