Esempio n. 1
0
    def configure(self):
        self.configure_ioloop()
        self.configure_managers()
        self.configure_commands()
        self.configure_extra_commands()
        # setup the shell
        self.shell = HubShell(self.managers["job_manager"])
        self.shell.register_managers(self.managers)
        self.shell.set_commands(self.commands, self.extra_commands)
        self.shell.server = self  # propagate server instance in shell
        # so it's accessible from the console if needed
        # set api
        if self.api_config != False:
            self.configure_api_endpoints(
            )  # after shell setup as it adds some default commands
            # we want to expose throught the api
            from biothings.hub.api import generate_api_routes
            self.routes = generate_api_routes(self.shell, self.api_endpoints)

        if self.dataupload_config != False:
            # this one is not bound to a specific command
            from biothings.hub.api.handlers.upload import UploadHandler
            # tuple type = interpreted as a route handler
            self.routes.append(("/dataupload/([\w\.-]+)?", UploadHandler,
                                self.dataupload_config))

        if self.websocket_config != False:
            # add websocket endpoint
            import biothings.hub.api.handlers.ws as ws
            import sockjs.tornado
            from biothings.utils.hub_db import ChangeWatcher
            listener = ws.HubDBListener()
            ChangeWatcher.add(listener)
            ChangeWatcher.publish()
            self.logger.info("Starting SockJS router")
            ws_router = sockjs.tornado.SockJSRouter(
                partial(ws.WebSocketConnection, listener=listener), '/ws')
            self.routes.extend(ws_router.urls)

        if self.reloader_config != False:
            monitored_folders = self.reloader_config["folders"] or [
                "hub/dataload/sources",
                getattr(config, "DATA_PLUGIN_FOLDER", None)
            ]
            reload_managers = [
                self.managers[m] for m in self.reloader_config["managers"]
                if m in self.managers
            ]
            reload_func = self.reloader_config["reload_func"] or partial(
                self.shell.restart, force=True)
            reloader = HubReloader(monitored_folders,
                                   reload_managers,
                                   reload_func=reload_func)
            reloader.monitor()

        # done
        self.configured = True
Esempio n. 2
0
    def configure(self):
        self.before_configure()
        self.remaining_features = copy.deepcopy(self.features) # keep track of what's been configured
        self.configure_ioloop()
        self.configure_managers()
        # setup the shell
        self.shell = HubShell(self.managers["job_manager"])
        self.shell.register_managers(self.managers)
        self.shell.server = self # propagate server instance in shell
                                 # so it's accessible from the console if needed
        self.configure_remaining_features()
        self.configure_commands()
        self.configure_extra_commands()
        self.shell.set_commands(self.commands,self.extra_commands)
        # set api
        if self.api_config != False:
            self.configure_api_endpoints() # after shell setup as it adds some default commands
                                           # we want to expose throught the api
            from biothings.hub.api import generate_api_routes
            self.routes.extend(generate_api_routes(self.shell, self.api_endpoints))

        # done
        self.configured = True
Esempio n. 3
0
class HubServer(object):

    DEFAULT_FEATURES = ["config","job","dump","upload","dataplugin","source",
                        "build","diff","index","snapshot","release","inspect","sync","api",
                        "terminal","reloader","dataupload","ws"]
    DEFAULT_MANAGERS_ARGS = {"upload" : {"poll_schedule" : "* * * * * */10"}}
    DEFAULT_RELOADER_CONFIG = {"folders": None, # will use default one
                               "managers" : ["source_manager","assistant_manager"],
                               "reload_func" : None} # will use default one
    DEFAULT_DATAUPLOAD_CONFIG = {"upload_root" : getattr(config,"DATA_UPLOAD_FOLDER",None)}
    DEFAULT_WEBSOCKET_CONFIG = {}
    DEFAULT_API_CONFIG = {}

    def __init__(self, source_list, features=None, name="BioThings Hub",
                 managers_custom_args={}, api_config=None, reloader_config=None,
                 dataupload_config=None, websocket_config=None):
        """
        Helper to setup and instantiate common managers usually used in a hub
        (eg. dumper manager, uploader manager, etc...)
        "source_list" is either:
            - a list of string corresponding to paths to datasources modules
            - a package containing sub-folders with datasources modules
        Specific managers can be retrieved adjusting "features" parameter, where
        each feature corresponds to one or more managers. Parameter defaults to
        all possible available. Managers are configured/init in the same order as the list,
        so if a manager (eg. job_manager) is required by all others, it must be the first
        in the list.
        "managers_custom_args" is an optional dict used to pass specific arguments while
        init managers:
            managers_custom_args={"upload" : {"poll_schedule" : "*/5 * * * *"}}
        will set poll schedule to check upload every 5min (instead of default 10s)
        "reloader_config", "dataupload_config" and "websocket_config" can be used to
        customize reloader, dataupload and websocket. If None, default config is used.
        If explicitely False, feature is deactivated.
        """
        self.name = name
        self.source_list = source_list
        self.logger, self.logfile = get_logger("hub")
        self._passed_features = features
        self._passed_managers_custom_args = managers_custom_args
        self.features = self.clean_features(features or self.DEFAULT_FEATURES)
        self.managers_custom_args = managers_custom_args
        self.reloader_config = reloader_config or self.DEFAULT_RELOADER_CONFIG
        self.dataupload_config = dataupload_config or self.DEFAULT_DATAUPLOAD_CONFIG
        self.websocket_config = websocket_config or self.DEFAULT_WEBSOCKET_CONFIG
        self.ws_listeners = [] # collect listeners that should be connected (push data through) to websocket
        self.api_config = api_config or self.DEFAULT_API_CONFIG
        # set during configure()
        self.managers = None
        self.api_endpoints = None
        self.shell = None
        self.commands = None
        self.extra_commands = None
        self.routes = []
        # flag "do we need to configure?"
        self.configured = False

    def clean_features(self, features):
        # we can't just use "set()" because we need to preserve order
        ordered = OrderedDict()
        for feat in features:
            if not feat in ordered:
                ordered[feat] = None
        return list(ordered.keys())

    def before_configure(self):
        """
        Hook triggered before configure(),
        used eg. to adjust features list
        """
        pass

    def configure(self):
        self.before_configure()
        self.remaining_features = copy.deepcopy(self.features) # keep track of what's been configured
        self.configure_ioloop()
        self.configure_managers()
        # setup the shell
        self.shell = HubShell(self.managers["job_manager"])
        self.shell.register_managers(self.managers)
        self.shell.server = self # propagate server instance in shell
                                 # so it's accessible from the console if needed
        self.configure_remaining_features()
        self.configure_commands()
        self.configure_extra_commands()
        self.shell.set_commands(self.commands,self.extra_commands)
        # set api
        if self.api_config != False:
            self.configure_api_endpoints() # after shell setup as it adds some default commands
                                           # we want to expose throught the api
            from biothings.hub.api import generate_api_routes
            self.routes.extend(generate_api_routes(self.shell, self.api_endpoints))

        # done
        self.configured = True

    def configure_ioloop(self):
        import tornado.platform.asyncio
        tornado.platform.asyncio.AsyncIOMainLoop().install()

    def before_start(self):
        pass

    def start(self):
        if not self.configured:
            self.configure()
        self.logger.info("Starting server '%s'" % self.name)
        # can't use asyncio.get_event_loop() if python < 3.5.3 as it would return
        # another instance of aio loop, take it from job_manager to make sure
        # we share the same one
        loop = self.managers["job_manager"].loop
        if self.routes:
            self.logger.info(self.routes)
            self.logger.info("Starting Hub API server")
            import tornado.web
            # register app into current event loop
            api = tornado.web.Application(self.routes)
            self.extra_commands["api"] = api
            from biothings.hub.api import start_api
            api_server = start_api(api,config.HUB_API_PORT,settings=getattr(config,"TORNADO_SETTINGS",{}))
        else:
            self.logger.info("No route defined, API server won't start")
        # at this point, everything is ready/set, last call for customizations
        self.before_start()
        self.ssh_server = start_ssh_server(loop,self.name,passwords=config.HUB_PASSWD,
                              port=config.HUB_SSH_PORT,shell=self.shell)
        try:
            loop.run_until_complete(self.ssh_server)
        except (OSError, asyncssh.Error) as exc:
            sys.exit('Error starting server: ' + str(exc))
        loop.run_forever()

    def mixargs(self, feat, params={}):
        args = {}
        for p in params:
            args[p] = self.managers_custom_args.get(feat,{}).pop(p,None) or params[p]
        # mix remaining
        args.update(self.managers_custom_args.get(feat,{}))
        return args

    def configure_job_manager(self):
        import asyncio
        loop = asyncio.get_event_loop()
        from biothings.utils.manager import JobManager
        args = self.mixargs("job",{"num_workers":config.HUB_MAX_WORKERS,"max_memory_usage":config.HUB_MAX_MEM_USAGE})
        job_manager = JobManager(loop,**args)
        self.managers["job_manager"] = job_manager

    def configure_dump_manager(self):
        from biothings.hub.dataload.dumper import DumperManager
        args = self.mixargs("dump")
        dmanager = DumperManager(job_manager=self.managers["job_manager"],**args)
        self.managers["dump_manager"] = dmanager

    def configure_upload_manager(self):
        from biothings.hub.dataload.uploader import UploaderManager
        args = self.mixargs("upload",{"poll_schedule":"* * * * * */10"})
        upload_manager = UploaderManager(job_manager=self.managers["job_manager"],**args)
        self.managers["upload_manager"] = upload_manager

    def configure_dataplugin_manager(self):
        from biothings.hub.dataplugin.manager import DataPluginManager
        dp_manager = DataPluginManager(job_manager=self.managers["job_manager"])
        self.managers["dataplugin_manager"] = dp_manager
        from biothings.hub.dataplugin.assistant import AssistantManager
        args = self.mixargs("dataplugin")
        assistant_manager = AssistantManager(
                data_plugin_manager=dp_manager,
                dumper_manager=self.managers["dump_manager"],
                uploader_manager=self.managers["upload_manager"],
                job_manager=self.managers["job_manager"],
                **args)
        self.managers["assistant_manager"] = assistant_manager

    def configure_build_manager(self):
        from biothings.hub.databuild.builder import BuilderManager
        args = self.mixargs("build")
        build_manager = BuilderManager(job_manager=self.managers["job_manager"],**args)
        build_manager.configure()
        self.managers["build_manager"] = build_manager

    def configure_diff_manager(self):
        from biothings.hub.databuild.differ import DifferManager, SelfContainedJsonDiffer
        args = self.mixargs("diff")
        diff_manager = DifferManager(job_manager=self.managers["job_manager"],
                                     poll_schedule="* * * * * */10",**args)
        diff_manager.configure([SelfContainedJsonDiffer,])
        diff_manager.poll("diff",lambda doc: diff_manager.diff("jsondiff-selfcontained",old=None,new=doc["_id"]))
        self.managers["diff_manager"] = diff_manager

    def configure_index_manager(self):
        from biothings.hub.dataindex.indexer import IndexManager
        args = self.mixargs("index")
        index_manager = IndexManager(job_manager=self.managers["job_manager"],**args)
        index_manager.configure(config.INDEX_CONFIG)
        self.managers["index_manager"] = index_manager

    def configure_snapshot_manager(self):
        assert "index" in self.features, "'snapshot' feature requires 'index'"
        from biothings.hub.dataindex.snapshooter import SnapshotManager
        args = self.mixargs("snapshot")
        snapshot_manager = SnapshotManager(
                index_manager=self.managers["index_manager"],
                job_manager=self.managers["job_manager"],
                **args)
        snapshot_manager.configure(config.SNAPSHOT_CONFIG)
        #snapshot_manager.poll("snapshot",lambda doc: snapshot_manager.snapshot(snapshot_env=???,index=doc["_id"]))
        self.managers["snapshot_manager"] = snapshot_manager

    def configure_release_manager(self):
        assert "diff" in self.features, "'release' feature requires 'diff'"
        assert "snapshot" in self.features, "'release' feature requires 'snapshot'"
        from biothings.hub.datarelease.publisher import ReleaseManager
        args = self.mixargs("release")
        release_manager = ReleaseManager(
                diff_manager=self.managers["diff_manager"],
                snapshot_manager=self.managers["snapshot_manager"],
                job_manager=self.managers["job_manager"],
                poll_schedule="* * * * * */10",**args)
        release_manager.configure(config.RELEASE_CONFIG)
        release_manager.poll("release_note",lambda doc: release_manager.create_release_note(old=None,new=doc["_id"]))
        self.managers["release_manager"] = release_manager

    def configure_sync_manager(self):
        from biothings.hub.databuild.syncer import SyncerManager
        args = self.mixargs("sync")
        sync_manager = SyncerManager(job_manager=self.managers["job_manager"],**args)
        sync_manager.configure()
        self.managers["sync_manager"] = sync_manager

    def configure_inspect_manager(self):
        assert "upload" in self.features, "'inspect' feature requires 'upload'"
        assert "build" in self.features, "'inspect' feature requires 'build'"
        from biothings.hub.datainspect.inspector import InspectorManager
        args = self.mixargs("inspect")
        inspect_manager = InspectorManager(
                upload_manager=self.managers["upload_manager"],
                build_manager=self.managers["build_manager"],
                job_manager=self.managers["job_manager"],**args)
        self.managers["inspect_manager"] = inspect_manager

    def configure_api_manager(self):
        assert "index" in self.features, "'api' feature requires 'index'"
        from biothings.hub.api.manager import APIManager
        args = self.mixargs("api")
        api_manager = APIManager(**args)
        self.managers["api_manager"] = api_manager

    def configure_source_manager(self):
        if "dump" in self.features or "upload" in self.features:
            args = self.mixargs("source")
            from biothings.hub.dataload.source import SourceManager
            source_manager = SourceManager(
                    source_list=self.source_list,
                    dump_manager=self.managers["dump_manager"],
                    upload_manager=self.managers["upload_manager"],
                    data_plugin_manager=self.managers.get("dataplugin_manager"),
                    )
            self.managers["source_manager"] = source_manager
        # init data plugin once source_manager has been set (it inits dumper and uploader
        # managers, if assistant_manager is configured/loaded before, datasources won't appear
        # in dumper/uploader managers as they were not ready yet)
        if "dataplugin" in self.features:
            self.managers["assistant_manager"].configure()
            self.managers["assistant_manager"].load()

        # now that we have the source manager setup, we can schedule and poll
        if "dump" in self.features and not getattr(config,"SKIP_DUMPER_SCHEDULE",False):
            self.managers["dump_manager"].schedule_all()
        if "upload" in self.features and not getattr(config,"SKIP_UPLOADER_POLL",False):
            self.managers["upload_manager"].poll('upload',lambda doc:
                    self.shell.launch(partial(self.managers["upload_manager"].upload_src,doc["_id"])))

    def configure_managers(self):
        if not self.managers is None:
            raise Exception("Managers have already been configured")
        self.managers = {}

        self.logger.info("Setting up managers for following features: %s" % self.features)
        assert "job" in self.features, "'job' feature is mandatory"
        if "source" in self.features:
            assert "dump" in self.features and "upload" in self.features, \
                    "'source' feature requires both 'dump' and 'upload' features"
        if "dataplugin" in self.features:
            assert "source" in self.features, "'dataplugin' feature requires 'source' feature"

        # specific order, eg. job_manager is used by all managers
        for feat in self.features:
            if hasattr(self,"configure_%s_manager" % feat):
                getattr(self,"configure_%s_manager" % feat)()
                self.remaining_features.remove(feat)
            elif hasattr(self,"configure_%s_feature" % feat):
                # see configure_remaining_features()
                pass # this is configured after managers but should not produce an error
            else:
                raise AttributeError("Feature '%s' listed but no 'configure_%s_{manager|feature}' method found" % (feat,feat))

        self.logger.info("Active manager(s): %s" % pformat(self.managers))

    def configure_config_feature(self):
        # just a placeholder
        pass

    def configure_ws_feature(self):
        # add websocket endpoint
        import biothings.hub.api.handlers.ws as ws
        import sockjs.tornado
        from biothings.utils.hub_db import ChangeWatcher
        # monitor change in database to report activity in webapp
        self.db_listener = ws.HubDBListener()
        ChangeWatcher.add(self.db_listener)
        ChangeWatcher.publish()
        self.log_listener = ws.LogListener()
        # push log statements to the webapp
        root_logger = logging.getLogger() # careful, asyncio logger will trigger log statement while in the handler
                                          # (ie. infinite loop), root logger not recommended) 
        root_logger.addHandler(WSLogHandler(self.log_listener))
        self.ws_listeners.extend([self.db_listener,self.log_listener])

        ws_router = sockjs.tornado.SockJSRouter(
                partial(ws.WebSocketConnection,
                    listeners=self.ws_listeners),
                '/ws')
        self.routes.extend(ws_router.urls)

    def configure_terminal_feature(self):
        assert "ws" in self.features, "'terminal' feature requires 'ws'"
        assert "ws" in self.remaining_features, "'terminal' feature should configured before 'ws'"
        # shell logger/listener to communicate between webapp and hub ssh console
        import biothings.hub.api.handlers.ws as ws
        shell_listener = ws.LogListener()
        shell_logger = logging.getLogger("shell")
        assert isinstance(shell_logger,ShellLogger), "shell_logger isn't properly set"
        shell_logger.addHandler(WSShellHandler(shell_listener))
        self.ws_listeners.append(shell_listener)
        # webapp terminal to hub shell connection through /shell endpoint
        from biothings.hub.api.handlers.shell import ShellHandler
        shell_endpoint = ("/shell",ShellHandler,
                {"shell":self.shell,"shellog":shell_logger})
        self.routes.append(shell_endpoint)

    def configure_dataupload_feature(self):
        assert "ws" in self.features, "'dataupload' feature requires 'ws'"
        assert "ws" in self.remaining_features, "'dataupload' feature should configured before 'ws'"
        # this one is not bound to a specific command
        from biothings.hub.api.handlers.upload import UploadHandler
        # tuple type = interpreted as a route handler
        self.routes.append(("/dataupload/([\w\.-]+)?",UploadHandler,self.dataupload_config))

    def configure_reloader_feature(self):
        monitored_folders = self.reloader_config["folders"] or ["hub/dataload/sources",getattr(config,"DATA_PLUGIN_FOLDER",None)]
        reload_managers = [self.managers[m] for m in self.reloader_config["managers"] if m in self.managers]
        reload_func = self.reloader_config["reload_func"] or partial(self.shell.restart,force=True)
        reloader = HubReloader(monitored_folders, reload_managers, reload_func=reload_func)
        reloader.monitor()

    def configure_remaining_features(self):
        self.logger.info("Setting up remaining features: %s" % self.remaining_features)
        # specific order, eg. job_manager is used by all managers
        for feat in copy.deepcopy(self.remaining_features):
            if hasattr(self,"configure_%s_feature" % feat):
                getattr(self,"configure_%s_feature" % feat)()
                self.remaining_features.remove(feat)
                pass # this is configured after managers but should not produce an error
            else:
                raise AttributeError("Feature '%s' listed but no 'configure_%s_feature' method found" % (feat,feat))
        
    def configure_commands(self):
        """
        Configure hub commands according to available managers
        """
        assert self.managers, "No managers configured"
        self.commands = HubCommands()
        self.commands["status"] = CommandDefinition(command=partial(status,self.managers),tracked=False)
        if "config" in self.features:
            self.commands["config"] = CommandDefinition(command=config.show,tracked=False)
            self.commands["setconf"] = config.store_value_to_db
            self.commands["resetconf"] = config.reset
        # getting info
        if self.managers.get("source_manager"):
            self.commands["source_info"] = CommandDefinition(command=self.managers["source_manager"].get_source,tracked=False)
            self.commands["source_reset"] = CommandDefinition(command=self.managers["source_manager"].reset,tracked=True)
        # dump commands
        if self.managers.get("dump_manager"):
            self.commands["dump"] = self.managers["dump_manager"].dump_src
            self.commands["dump_all"] = self.managers["dump_manager"].dump_all
        # upload commands
        if self.managers.get("upload_manager"):
            self.commands["upload"] = self.managers["upload_manager"].upload_src
            self.commands["upload_all"] = self.managers["upload_manager"].upload_all
        # building/merging
        if self.managers.get("build_manager"):
            self.commands["whatsnew"] = CommandDefinition(command=self.managers["build_manager"].whatsnew,tracked=False)
            self.commands["lsmerge"] = self.managers["build_manager"].list_merge
            self.commands["rmmerge"] = self.managers["build_manager"].delete_merge
            self.commands["merge"] = self.managers["build_manager"].merge
            self.commands["archive"] = self.managers["build_manager"].archive_merge
        if hasattr(config,"INDEX_CONFIG"):
            self.commands["index_config"] = config.INDEX_CONFIG
        if hasattr(config,"SNAPSHOT_CONFIG"):
            self.commands["snapshot_config"] = config.SNAPSHOT_CONFIG
        if hasattr(config,"PUBLISH_CONFIG"):
            self.commands["publish_config"] = config.PUBLISH_CONFIG
        # diff
        if self.managers.get("diff_manager"):
            self.commands["diff"] = self.managers["diff_manager"].diff
            self.commands["report"] = self.managers["diff_manager"].diff_report
        # indexing commands
        if self.managers.get("index_manager"):
            self.commands["index"] = self.managers["index_manager"].index
        if self.managers.get("snapshot_manager"):
            self.commands["snapshot"] = self.managers["snapshot_manager"].snapshot
        # data release commands
        if self.managers.get("release_manager"):
            self.commands["create_release_note"] = self.managers["release_manager"].create_release_note
            self.commands["get_release_note"] = CommandDefinition(command=self.managers["release_manager"].get_release_note,tracked=False)
            self.commands["publish"] = self.managers["release_manager"].publish
            self.commands["publish_diff"] = self.managers["release_manager"].publish_diff
            self.commands["publish_snapshot"] = self.managers["release_manager"].publish_snapshot
        if self.managers.get("sync_manager"):
            self.commands["sync"] = CommandDefinition(command=self.managers["sync_manager"].sync)
        # inspector
        if self.managers.get("inspect_manager"):
            self.commands["inspect"] = self.managers["inspect_manager"].inspect
        # data plugins
        if self.managers.get("assistant_manager"):
            self.commands["register_url"] = partial(self.managers["assistant_manager"].register_url)
            self.commands["unregister_url"] = partial(self.managers["assistant_manager"].unregister_url)
            self.commands["export_plugin"] = partial(self.managers["assistant_manager"].export)
        if self.managers.get("dataplugin_manager"):
            self.commands["dump_plugin"] = self.managers["dataplugin_manager"].dump_src

        logging.info("Registered commands: %s" % list(self.commands.keys()))

    def configure_extra_commands(self):
        """
        Same as configure_commands() but commands are not exposed publicly in the shell
        (they are shortcuts or commands for API endpoints, supporting commands, etc...)
        """
        assert self.managers, "No managers configured"
        self.extra_commands = {} # unordered since not exposed, we don't care
        loop = self.managers.get("job_manager") and self.managers["job_manager"].loop or asyncio.get_event_loop()
        self.extra_commands["g"] = CommandDefinition(command=globals(),tracked=False)
        self.extra_commands["sch"] = CommandDefinition(command=partial(schedule,loop),tracked=False)
        # expose contant so no need to put quotes (eg. top(pending) instead of top("pending")
        self.extra_commands["pending"] = CommandDefinition(command=pending,tracked=False)
        self.extra_commands["loop"] = CommandDefinition(command=loop,tracked=False)

        if self.managers.get("job_manager"):
            self.extra_commands["pqueue"] = CommandDefinition(command=self.managers["job_manager"].process_queue,tracked=False)
            self.extra_commands["tqueue"] = CommandDefinition(command=self.managers["job_manager"].thread_queue,tracked=False)
            self.extra_commands["jm"] = CommandDefinition(command=self.managers["job_manager"],tracked=False)
            self.extra_commands["top"] = CommandDefinition(command=self.managers["job_manager"].top,tracked=False)
            self.extra_commands["job_info"] = CommandDefinition(command=self.managers["job_manager"].job_info,tracked=False)
        if self.managers.get("source_manager"):
            self.extra_commands["sm"] = CommandDefinition(command=self.managers["source_manager"],tracked=False)
            self.extra_commands["sources"] = CommandDefinition(command=self.managers["source_manager"].get_sources,tracked=False)
            self.extra_commands["source_save_mapping"] = CommandDefinition(command=self.managers["source_manager"].save_mapping)
        if self.managers.get("dump_manager"):
            self.extra_commands["dm"] = CommandDefinition(command=self.managers["dump_manager"],tracked=False)
            self.extra_commands["dump_info"] = CommandDefinition(command=self.managers["dump_manager"].dump_info,tracked=False)
        if self.managers.get("dataplugin_manager"):
            self.extra_commands["dpm"] = CommandDefinition(command=self.managers["dataplugin_manager"],tracked=False)
        if self.managers.get("assistant_manager"):
            self.extra_commands["am"] = CommandDefinition(command=self.managers["assistant_manager"],tracked=False)
        if self.managers.get("upload_manager"):
            self.extra_commands["um"] = CommandDefinition(command=self.managers["upload_manager"],tracked=False)
            self.extra_commands["upload_info"] = CommandDefinition(command=self.managers["upload_manager"].upload_info,tracked=False)
        if self.managers.get("build_manager"):
            self.extra_commands["bm"] = CommandDefinition(command=self.managers["build_manager"],tracked=False)
            self.extra_commands["builds"] = CommandDefinition(command=self.managers["build_manager"].build_info,tracked=False)
            self.extra_commands["build"] = CommandDefinition(command=lambda id: self.managers["build_manager"].build_info(id=id),tracked=False)
            self.extra_commands["build_config_info"] = CommandDefinition(command=self.managers["build_manager"].build_config_info,tracked=False)
            self.extra_commands["build_save_mapping"] = CommandDefinition(command=self.managers["build_manager"].save_mapping)
            self.extra_commands["create_build_conf"] = CommandDefinition(command=self.managers["build_manager"].create_build_configuration)
            self.extra_commands["update_build_conf"] = CommandDefinition(command=self.managers["build_manager"].update_build_configuration)
            self.extra_commands["delete_build_conf"] = CommandDefinition(command=self.managers["build_manager"].delete_build_configuration)
        if self.managers.get("diff_manager"):
            self.extra_commands["dim"] = CommandDefinition(command=self.managers["diff_manager"],tracked=False)
            self.extra_commands["diff_info"] = CommandDefinition(command=self.managers["diff_manager"].diff_info,tracked=False)
            self.extra_commands["jsondiff"] = CommandDefinition(command=jsondiff,tracked=False)
        if self.managers.get("sync_manager"):
            self.extra_commands["sym"] = CommandDefinition(command=self.managers["sync_manager"],tracked=False)
        if self.managers.get("index_manager"):
            self.extra_commands["im"] = CommandDefinition(command=self.managers["index_manager"],tracked=False)
            self.extra_commands["index_info"] = CommandDefinition(command=self.managers["index_manager"].index_info,tracked=False)
            self.extra_commands["validate_mapping"] = CommandDefinition(command=self.managers["index_manager"].validate_mapping)
        if self.managers.get("snapshot_manager"):
            self.extra_commands["ssm"] = CommandDefinition(command=self.managers["snapshot_manager"],tracked=False)
            self.extra_commands["snapshot_info"] = CommandDefinition(command=self.managers["snapshot_manager"].snapshot_info,tracked=False)
        if self.managers.get("release_manager"):
            self.extra_commands["rm"] = CommandDefinition(command=self.managers["release_manager"],tracked=False)
            self.extra_commands["release_info"] = CommandDefinition(command=self.managers["release_manager"].release_info,tracked=False)
            self.extra_commands["reset_synced"] = CommandDefinition(command=self.managers["release_manager"].reset_synced,tracked=True)
        if self.managers.get("inspect_manager"):
            self.extra_commands["ism"] = CommandDefinition(command=self.managers["inspect_manager"],tracked=False)
        if self.managers.get("api_manager"):
            self.extra_commands["api"] = CommandDefinition(command=self.managers["api_manager"],tracked=False)
            self.extra_commands["get_apis"] = CommandDefinition(command=self.managers["api_manager"].get_apis,tracked=False)
            self.extra_commands["delete_api"] = CommandDefinition(command=self.managers["api_manager"].delete_api)
            self.extra_commands["create_api"] = CommandDefinition(command=self.managers["api_manager"].create_api)
            self.extra_commands["start_api"] = CommandDefinition(command=self.managers["api_manager"].start_api)
            self.extra_commands["stop_api"] = self.managers["api_manager"].stop_api

        logging.debug("Registered extra (private) commands: %s" % list(self.extra_commands.keys()))

    def configure_api_endpoints(self):
        cmdnames = list(self.commands.keys())
        if self.extra_commands:
            cmdnames.extend(list(self.extra_commands.keys()))
        from biothings.hub.api import EndpointDefinition
        self.api_endpoints = {}
        self.api_endpoints["config"] = []
        if "config" in cmdnames:
            self.api_endpoints["config"].append(EndpointDefinition(name="config",method="get"))
            self.api_endpoints["config"].append(EndpointDefinition(name="setconf",method="put",force_bodyargs=True))
            self.api_endpoints["config"].append(EndpointDefinition(name="resetconf",method="delete",force_bodyargs=True))
        if not self.api_endpoints["config"]:
            self.api_endpoints.pop("config")
        if "builds" in cmdnames: self.api_endpoints["builds"] = EndpointDefinition(name="builds",method="get")
        self.api_endpoints["build"] = []
        if "build" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(method="get",name="build"))
        if "archive" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(method="post",name="archive",suffix="archive"))
        if "rmmerge" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(method="delete",name="rmmerge"))
        if "merge" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(name="merge",method="put",suffix="new"))
        if "build_save_mapping" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(name="build_save_mapping",method="put",suffix="mapping"))
        if not self.api_endpoints["build"]:
            self.api_endpoints.pop("build")
        self.api_endpoints["publish"] = []
        if "publish_diff" in cmdnames: self.api_endpoints["publish"].append(EndpointDefinition(name="publish_diff",method="post",suffix="incremental",force_bodyargs=True))
        if "publish_snapshot" in cmdnames: self.api_endpoints["publish"].append(EndpointDefinition(name="publish_snapshot",method="post",suffix="full",force_bodyargs=True))
        if not self.api_endpoints["publish"]:
            self.api_endpoints.pop("publish")
        if "diff" in cmdnames: self.api_endpoints["diff"] = EndpointDefinition(name="diff",method="put",force_bodyargs=True)
        if "job_info" in cmdnames: self.api_endpoints["job_manager"] = EndpointDefinition(name="job_info",method="get")
        if "dump_info" in cmdnames: self.api_endpoints["dump_manager"] = EndpointDefinition(name="dump_info", method="get")
        if "upload_info" in cmdnames: self.api_endpoints["upload_manager"] = EndpointDefinition(name="upload_info",method="get")
        if "build_config_info" in cmdnames: self.api_endpoints["build_manager"] = EndpointDefinition(name="build_config_info",method="get")
        if "index_info" in cmdnames: self.api_endpoints["index_manager"] = EndpointDefinition(name="index_info",method="get")
        if "snapshot_info" in cmdnames: self.api_endpoints["snapshot_manager"] = EndpointDefinition(name="snapshot_info",method="get")
        if "release_info" in cmdnames: self.api_endpoints["release_manager"] = EndpointDefinition(name="release_info",method="get")
        if "reset_synced" in cmdnames: self.api_endpoints["release_manager/reset_synced"] = EndpointDefinition(name="reset_synced",method="put")
        if "diff_info" in cmdnames: self.api_endpoints["diff_manager"] = EndpointDefinition(name="diff_info",method="get")
        if "commands" in cmdnames: self.api_endpoints["commands"] = EndpointDefinition(name="commands",method="get")
        if "command" in cmdnames: self.api_endpoints["command"] = EndpointDefinition(name="command",method="get")
        if "sources" in cmdnames: self.api_endpoints["sources"] = EndpointDefinition(name="sources",method="get")
        self.api_endpoints["source"] = []
        if "source_info" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="source_info",method="get"))
        if "source_reset" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="source_reset",method="post",suffix="reset"))
        if "dump" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="dump",method="put",suffix="dump"))
        if "upload" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="upload",method="put",suffix="upload"))
        if "source_save_mapping" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="source_save_mapping",method="put",suffix="mapping"))
        if not self.api_endpoints["source"]:
            self.api_endpoints.pop("source")
        if "inspect" in cmdnames: self.api_endpoints["inspect"] = EndpointDefinition(name="inspect",method="put",force_bodyargs=True)
        if "register_url" in cmdnames: self.api_endpoints["dataplugin/register_url"] = EndpointDefinition(name="register_url",method="post",force_bodyargs=True)
        if "unregister_url" in cmdnames: self.api_endpoints["dataplugin/unregister_url"] = EndpointDefinition(name="unregister_url",method="delete",force_bodyargs=True)
        self.api_endpoints["dataplugin"] = []
        if "dump_plugin" in cmdnames: self.api_endpoints["dataplugin"].append(EndpointDefinition(name="dump_plugin",method="put",suffix="dump"))
        if "export_plugin" in cmdnames: self.api_endpoints["dataplugin"].append(EndpointDefinition(name="export_plugin",method="put",suffix="export"))
        if not self.api_endpoints["dataplugin"]:
            self.api_endpoints.pop("dataplugin")
        if "jsondiff" in cmdnames: self.api_endpoints["jsondiff"] = EndpointDefinition(name="jsondiff",method="post",force_bodyargs=True)
        if "validate_mapping" in cmdnames: self.api_endpoints["mapping/validate"] = EndpointDefinition(name="validate_mapping",method="post",force_bodyargs=True)
        self.api_endpoints["buildconf"] = []
        if "create_build_conf" in cmdnames:
            self.api_endpoints["buildconf"].append(EndpointDefinition(name="create_build_conf",method="post",force_bodyargs=True))
            self.api_endpoints["buildconf"].append(EndpointDefinition(name="update_build_conf",method="put",force_bodyargs=True))
        if "delete_build_conf" in cmdnames: self.api_endpoints["buildconf"].append(EndpointDefinition(name="delete_build_conf",method="delete",force_bodyargs=True))
        if not self.api_endpoints["buildconf"]:
            self.api_endpoints.pop("buildconf")
        if "index" in cmdnames: self.api_endpoints["index"] = EndpointDefinition(name="index",method="put",force_bodyargs=True)
        if "snapshot" in cmdnames: self.api_endpoints["snapshot"] = EndpointDefinition(name="snapshot",method="put",force_bodyargs=True)
        if "sync" in cmdnames: self.api_endpoints["sync"] = EndpointDefinition(name="sync",method="post",force_bodyargs=True)
        if "whatsnew" in cmdnames: self.api_endpoints["whatsnew"] = EndpointDefinition(name="whatsnew",method="get")
        if "status" in cmdnames: self.api_endpoints["status"] = EndpointDefinition(name="status",method="get")
        self.api_endpoints["release_note"] = []
        if "create_release_note" in cmdnames:
            self.api_endpoints["release_note"].append(EndpointDefinition(name="create_release_note",method="put",suffix="create",force_bodyargs=True))
        if "get_release_note" in cmdnames:
            self.api_endpoints["release_note"].append(EndpointDefinition(name="get_release_note",method="get",force_bodyargs=True))
        if not self.api_endpoints["release_note"]:
            self.api_endpoints.pop("release_note")
        self.api_endpoints["api"] = []
        if "start_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="start_api",method="put",suffix="start"))
        if "stop_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="stop_api",method="put",suffix="stop"))
        if "delete_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="delete_api",method="delete",force_bodyargs=True))
        if "create_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="create_api",method="post",force_bodyargs=True))
        if not self.api_endpoints["api"]:
            self.api_endpoints.pop("api")
        if "get_apis" in cmdnames: self.api_endpoints["api/list"] = EndpointDefinition(name="get_apis",method="get")
        if "stop" in cmdnames: self.api_endpoints["stop"] = EndpointDefinition(name="stop",method="put")
        if "restart" in cmdnames: self.api_endpoints["restart"] = EndpointDefinition(name="restart",method="put")
Esempio n. 4
0
logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND)
logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE)

from biothings.utils.hub import start_server, HubShell

from biothings.utils.manager import JobManager
loop = asyncio.get_event_loop()
process_queue = concurrent.futures.ProcessPoolExecutor(
    max_workers=config.HUB_MAX_WORKERS)
thread_queue = concurrent.futures.ThreadPoolExecutor()
loop.set_default_executor(process_queue)
job_manager = JobManager(loop,
                         num_workers=config.HUB_MAX_WORKERS,
                         max_memory_usage=config.HUB_MAX_MEM_USAGE)

shell = HubShell(job_manager)

import hub.dataload
import biothings.hub.dataload.uploader as uploader
import biothings.hub.dataload.dumper as dumper
import biothings.hub.dataload.source as source
import biothings.hub.databuild.builder as builder
import biothings.hub.databuild.differ as differ
import biothings.hub.databuild.syncer as syncer
import biothings.hub.dataindex.indexer as indexer
import biothings.hub.datainspect.inspector as inspector
from biothings.hub.api.manager import APIManager
from hub.databuild.builder import MyGeneDataBuilder
from hub.databuild.mapper import EntrezRetired2Current, Ensembl2Entrez
import biothings.utils.mongo as mongo
Esempio n. 5
0
                         max_memory_usage=config.HUB_MAX_MEM_USAGE)

import hub.dataload
from biothings.utils.hub import schedule, pending, done, start_server, \
                                HubShell
import biothings.hub.dataload.uploader as uploader
import biothings.hub.dataload.dumper as dumper
import biothings.hub.dataload.source as source
import biothings.hub.databuild.builder as builder
import biothings.hub.databuild.differ as differ
import biothings.hub.databuild.syncer as syncer
import biothings.hub.dataindex.indexer as indexer
from hub.databuild.builder import MyChemDataBuilder
from hub.dataindex.indexer import DrugIndexer

shell = HubShell(job_manager)

# will check every 10 seconds for sources to upload
upload_manager = uploader.UploaderManager(poll_schedule='* * * * * */10',
                                          job_manager=job_manager)
dump_manager = dumper.DumperManager(job_manager=job_manager)
sources_path = hub.dataload.__sources_dict__
smanager = source.SourceManager(sources_path, dump_manager, upload_manager)

dump_manager.schedule_all()
upload_manager.poll(
    'upload',
    lambda doc: shell.launch(partial(upload_manager.upload_src, doc["_id"])))

build_manager = builder.BuilderManager(builder_class=MyChemDataBuilder,
                                       job_manager=job_manager)
Esempio n. 6
0
import biothings.hub.dataload.uploader as uploader
import biothings.hub.dataload.dumper as dumper
import biothings.hub.dataload.source as source
import biothings.hub.databuild.builder as builder
import biothings.hub.databuild.differ as differ
import biothings.hub.databuild.syncer as syncer
import biothings.hub.dataindex.indexer as indexer
import biothings.hub.datainspect.inspector as inspector
from biothings.hub.api.manager import APIManager
from hub.databuild.builder import MyVariantDataBuilder
from hub.databuild.mapper import TagObserved
from hub.dataindex.indexer import VariantIndexer
from biothings.utils.hub import schedule, pending, done, CompositeCommand, \
                                start_server, HubShell, CommandDefinition

shell = HubShell(job_manager)

# will check every 10 seconds for sources to upload
upload_manager = uploader.UploaderManager(poll_schedule='* * * * * */10',
                                          job_manager=job_manager)
dmanager = dumper.DumperManager(job_manager=job_manager)
sources_path = hub.dataload.__sources_dict__  #"hub/dataload/sources"
smanager = source.SourceManager(sources_path, dmanager, upload_manager)

#dmanager.schedule_all()
upload_manager.poll(
    'upload',
    lambda doc: shell.launch(partial(upload_manager.upload_src, doc["_id"])))

# deal with 3rdparty datasources
import biothings.hub.dataplugin.assistant as assistant