def _main(): # pragma: no cover """ This is the entrypoint of the girder-sftpd program. It should not be called from python code. """ import argparse parser = argparse.ArgumentParser( prog='girder-sftpd', description='Run the Girder SFTP service.') parser.add_argument( '-i', '--identity-file', required=False, help='path to identity (private key) file') parser.add_argument('-p', '--port', required=False, default=DEFAULT_PORT, type=int) parser.add_argument('-H', '--host', required=False, default='localhost') args = parser.parse_args() keyFile = args.identity_file or os.path.expanduser(os.path.join('~', '.ssh', 'id_rsa')) try: hostKey = paramiko.RSAKey.from_private_key_file(keyFile) except paramiko.ssh_exception.PasswordRequiredException: logprint.error( 'Error: encrypted key files are not supported (%s).' % keyFile, file=sys.stderr) sys.exit(1) server = SftpServer((args.host, args.port), hostKey) logprint.info('Girder SFTP service listening on %s:%d.' % (args.host, args.port)) try: server.serve_forever() except (SystemExit, KeyboardInterrupt): server.server_close()
def getCache(self, numItems=None): curConfig = self.getConfig() # memcached is the fallback default, if available. cacheBackend = curConfig.get('cache_backend', 'memcached') if cacheBackend: cacheBackend = str(cacheBackend).lower() if cacheBackend == 'memcached' and MemCache and numItems is None: # lock needed because pylibmc(memcached client) is not threadsafe cacheLock = threading.Lock() # check if credentials and location exist for girder otherwise # assume location is 127.0.0.1 (localhost) with no password url = curConfig.get('cache_memcached_url') if not url: url = '127.0.0.1' memcachedUsername = curConfig.get('cache_memcached_username') if not memcachedUsername: memcachedUsername = None memcachedPassword = curConfig.get('cache_memcached_password') if not memcachedPassword: memcachedPassword = None cache = MemCache(url, memcachedUsername, memcachedPassword) else: # fallback backend cacheBackend = 'python' cache = LRUCache(self.getCacheSize(numItems)) cacheLock = None if numItems is None and not CacheFactory.logged: logprint.info('Using %s for large_image caching' % cacheBackend) CacheFactory.logged = True return cache, cacheLock
def loadPlugins(plugins, root, appconf, apiRoot=None, curConfig=None, buildDag=True): """ Loads a set of plugins into the application. :param plugins: The set of plugins to load, by directory name. :type plugins: list :param root: The root node of the server tree. :type root: object :param appconf: The server's cherrypy configuration object. :type appconf: dict :param apiRoot: The cherrypy api root object. :type apiRoot: object or None :param curConfig: A girder config object to use. :type curConfig: dict or None :param buildDag: If the ``plugins`` parameter is already a topo-sorted list with all dependencies resolved, set this to False and it will skip rebuilding the DAG. Otherwise the dependency resolution and sorting will occur within this method. :type buildDag: bool :returns: A 3-tuple containing the modified root, config, and apiRoot objects. :rtype tuple: """ # Register a pseudo-package for the root of all plugins. This must be # present in the system module list in order to avoid import warnings. if curConfig is None: curConfig = _config.getConfig() if 'plugins' in curConfig and 'plugin_directory' in curConfig['plugins']: logprint.warning( 'Warning: the plugin_directory setting is deprecated. Please use ' 'the `girder-install plugin` command and remove this setting from ' 'your config file.') if ROOT_PLUGINS_PACKAGE not in sys.modules: module = imp.new_module(ROOT_PLUGINS_PACKAGE) girder.plugins = module sys.modules[ROOT_PLUGINS_PACKAGE] = module logprint.info('Resolving plugin dependencies...') if buildDag: plugins = getToposortedPlugins(plugins, curConfig, ignoreMissing=True) for plugin in plugins: try: root, appconf, apiRoot = loadPlugin( plugin, root, appconf, apiRoot, curConfig=curConfig) logprint.success('Loaded plugin "%s"' % plugin) except Exception: logprint.exception( 'ERROR: Failed to load plugin "%s":' % plugin) return root, appconf, apiRoot
def finalizeJob(self, jobId): """ Finalize a job after completing the workflow. :param jobId: Job identifier. :type jobId: str """ with self._lock: logprint.info( 'DanesfieldWorkflowManager.finalizeJob Job={}'.format(jobId)) self._jobData.pop(jobId, None)
def stepFailed(self, jobId, stepName): """ Call when a step fails or is canceled. """ with self._lock: logprint.info('DanesfieldWorkflowManager.stepFailed Job={} ' 'StepName={}'.format(jobId, stepName)) jobData = self._getJobData(jobId) # Record that step failed jobData['runningSteps'].remove(stepName) jobData['failedSteps'].add(stepName) if not jobData['runningSteps']: self.finalizeJob(jobId)
def addStandardOutput(self, jobId, stepName, output): """ Record standard output from a step. :param jobId: Identifier of the job. :type jobId: str :param stepName: The name of the step to which the output belongs. :type stepName: str (DanesfieldStep) :param output: Standard output :type output: list[str] """ with self._lock: logprint.info('DanesfieldWorkflowManager.addStandardOutput Job={} ' 'StepName={}'.format(jobId, stepName)) jobData = self._getJobData(jobId) jobData['standardOutput'][stepName] = output
def testLogPrint(tempLog): tempLog = configureLogging({'log_max_info_level': 'INFO'}) infoSize1 = os.path.getsize(tempLog['info_log_file']) errorSize1 = os.path.getsize(tempLog['error_log_file']) logprint.info(INFO_MSG) infoSize2 = os.path.getsize(tempLog['info_log_file']) errorSize2 = os.path.getsize(tempLog['error_log_file']) assert infoSize2 > infoSize1 assert errorSize2 == errorSize1 logprint.error(ERROR_MSG) infoSize3 = os.path.getsize(tempLog['info_log_file']) errorSize3 = os.path.getsize(tempLog['error_log_file']) # logprint sends to stdout, which we capture except when sent via # logprint, so we shouldn't see any additional data on the info log. assert infoSize3 == infoSize2 assert errorSize3 > errorSize2
def testLogPrint(self): self.configureLogging({'log_max_info_level': 'INFO'}) infoSize1 = os.path.getsize(self.infoFile) errorSize1 = os.path.getsize(self.errorFile) logprint.info(self.infoMessage) infoSize2 = os.path.getsize(self.infoFile) errorSize2 = os.path.getsize(self.errorFile) self.assertGreater(infoSize2, infoSize1) self.assertEqual(errorSize2, errorSize1) logprint.error(self.errorMessage) infoSize3 = os.path.getsize(self.infoFile) errorSize3 = os.path.getsize(self.errorFile) # logprint sends to stdout, which we capture except when sent via # logprint, so we shouldn't see any additional data on the info log. self.assertEqual(infoSize3, infoSize2) self.assertGreater(errorSize3, errorSize2)
def addFile(self, jobId, stepName, file): """ Record a file created by a step. :param jobId: Identifier of job that created the file. :type jobId: str :param stepName: The name of the step that created the file. :type stepName: str (DanesfieldStep) :param file: File document. :type file: dict """ with self._lock: logprint.info( 'DanesfieldWorkflowManager.addFile Job={} StepName={} File={}'. format(jobId, stepName, file['_id'])) jobData = self._getJobData(jobId) jobData['files'].setdefault(stepName, []).append(file)
def loadPlugins(plugins, root, appconf, apiRoot=None, buildDag=True): """ Loads a set of plugins into the application. :param plugins: The set of plugins to load, by directory name. :type plugins: list :param root: The root node of the server tree. :type root: object :param appconf: The server's cherrypy configuration object. :type appconf: dict :param apiRoot: The cherrypy api root object. :type apiRoot: object or None :param buildDag: If the ``plugins`` parameter is already a topo-sorted list with all dependencies resolved, set this to False and it will skip rebuilding the DAG. Otherwise the dependency resolution and sorting will occur within this method. :type buildDag: bool :returns: A 3-tuple containing the modified root, config, and apiRoot objects. :rtype tuple: """ # Register a pseudo-package for the root of all plugins. This must be # present in the system module list in order to avoid import warnings. if ROOT_PLUGINS_PACKAGE not in sys.modules: module = imp.new_module(ROOT_PLUGINS_PACKAGE) girder.plugins = module sys.modules[ROOT_PLUGINS_PACKAGE] = module logprint.info('Resolving plugin dependencies...') if buildDag: plugins = getToposortedPlugins(plugins, ignoreMissing=True) for plugin in plugins: try: root, appconf, apiRoot = loadPlugin(plugin, root, appconf, apiRoot) _clearPluginFailureInfo(plugin=plugin) logprint.success('Loaded plugin "%s"' % plugin) except Exception: _recordPluginFailureInfo(plugin=plugin, traceback=traceback.format_exc()) logprint.exception('ERROR: Failed to load plugin "%s":' % plugin) return root, appconf, apiRoot
def main(identity_file, port, host): """ This is the entrypoint of the girder sftpd program. It should not be called from python code. """ try: hostKey = paramiko.RSAKey.from_private_key_file(identity_file) except paramiko.ssh_exception.PasswordRequiredException: logprint.error( 'Error: encrypted key files are not supported (%s).' % identity_file, file=sys.stderr) sys.exit(1) server = SftpServer((host, port), hostKey) logprint.info('Girder SFTP service listening on %s:%d.' % (host, port)) try: server.serve_forever() except (SystemExit, KeyboardInterrupt): server.server_close()
def main(identity_file, port, host): """ This is the entrypoint of the girder sftpd program. It should not be called from python code. """ try: hostKey = paramiko.RSAKey.from_private_key_file(identity_file) except paramiko.ssh_exception.PasswordRequiredException: logprint.error('Error: encrypted key files are not supported (%s).' % identity_file, file=sys.stderr) sys.exit(1) server = SftpServer((host, port), hostKey) logprint.info('Girder SFTP service listening on %s:%d.' % (host, port)) try: server.serve_forever() except (SystemExit, KeyboardInterrupt): server.server_close()
def getCache(self): defaultConfig = {} if config: curConfig = config.getConfig().get('large_image', defaultConfig) else: curConfig = defaultConfig # memcached is the fallback default, if available. cacheBackend = curConfig.get('cache_backend', 'memcached') if cacheBackend: cacheBackend = str(cacheBackend).lower() if cacheBackend == 'memcached' and MemCache: # lock needed because pylibmc(memcached client) is not threadsafe tileCacheLock = threading.Lock() # check if credentials and location exist for girder otherwise # assume location is 127.0.0.1 (localhost) with no password url = curConfig.get('cache_memcached_url') if not url: url = '127.0.0.1' memcachedUsername = curConfig.get('cache_memcached_username') if not memcachedUsername: memcachedUsername = None memcachedPassword = curConfig.get('cache_memcached_password') if not memcachedPassword: memcachedPassword = None tileCache = MemCache(url, memcachedUsername, memcachedPassword) else: # fallback backend cacheBackend = 'python' try: portion = int(curConfig.get('cache_python_memory_portion', 8)) if portion < 3: portion = 3 except ValueError: portion = 16 tileCache = LRUCache(pickAvailableCache(256**2 * 4, portion)) tileCacheLock = None logprint.info('Using %s for large_image caching' % cacheBackend) return tileCache, tileCacheLock
def stepSucceeded(self, jobId, stepName): """ Call when a step completes successfully. """ with self._lock: logprint.info( 'DanesfieldWorkflowManager.stepSucceeded Job={} StepName={}'. format(jobId, stepName)) jobData = self._getJobData(jobId) # Record that step completed jobData['runningSteps'].remove(stepName) jobData['completedSteps'].add(stepName) # Create working set containing files created by step files = jobData['files'].get(stepName) workingSet = None if files: initialWorkingSet = jobData['workingSets'][DanesfieldStep.INIT] workingSetName = '{}: {}'.format(initialWorkingSet['name'], stepName) datasetIds = [file['itemId'] for file in files] workingSet = WorkingSet().createWorkingSet( name=workingSetName, parentWorkingSet=initialWorkingSet, datasetIds=datasetIds) jobData['workingSets'][stepName] = workingSet # Remove data applicable only while step is running jobData['files'].pop(stepName, None) jobData['groupResult'].pop(stepName, None) logprint.info( 'DanesfieldWorkflowManager.createdWorkingSet Job={} ' 'StepName={} WorkingSet={}'.format( jobId, stepName, workingSet['_id'] if workingSet is not None else None))
def configureServer(test=False, plugins=None, curConfig=None): """ Function to setup the cherrypy server. It configures it, but does not actually start it. :param test: Set to True when running in the tests. :type test: bool :param plugins: If you wish to start the server with a custom set of plugins, pass this as a list of plugins to load. Otherwise, will use the PLUGINS_ENABLED setting value from the db. :param curConfig: The configuration dictionary to update. """ if curConfig is None: curConfig = config.getConfig() routeTable = loadRouteTable() appconf = { '/': { 'request.dispatch': cherrypy.dispatch.MethodDispatcher(), 'request.show_tracebacks': test, 'request.methods_with_bodies': ('POST', 'PUT', 'PATCH'), 'response.headers.server': 'Girder %s' % __version__, 'error_page.default': _errorDefault } } # Add MIME types for serving Fontello files from staticdir; # these may be missing or incorrect in the OS mimetypes.add_type('application/vnd.ms-fontobject', '.eot') mimetypes.add_type('application/x-font-ttf', '.ttf') mimetypes.add_type('application/font-woff', '.woff') if test: appconf['/src'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients/web/src', } appconf['/test'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients/web/test', } appconf['/clients'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients' } appconf['/plugins'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'plugins', } curConfig.update(appconf) if test: # Force some config params in testing mode curConfig.update({ 'server': { 'mode': 'testing', 'api_root': 'api/v1', 'static_root': 'static', 'api_static_root': '../static' } }) mode = curConfig['server']['mode'].lower() logprint.info('Running in mode: ' + mode) cherrypy.config['engine.autoreload.on'] = mode == 'development' # Don't import this until after the configs have been read; some module # initialization code requires the configuration to be set up. from girder.api import api_main root = webroot.Webroot() api_main.addApiToNode(root) cherrypy.engine.subscribe('start', girder.events.daemon.start) cherrypy.engine.subscribe('stop', girder.events.daemon.stop) if plugins is None: settings = model_importer.ModelImporter().model('setting') plugins = settings.get(constants.SettingKey.PLUGINS_ENABLED, default=()) plugins = list( plugin_utilities.getToposortedPlugins(plugins, ignoreMissing=True)) root.updateHtmlVars({ 'apiRoot': curConfig['server']['api_root'], 'staticRoot': os.path.relpath(routeTable[constants.GIRDER_STATIC_ROUTE_ID], routeTable[constants.GIRDER_ROUTE_ID]), 'plugins': plugins }) # Make the staticRoot relative to the api_root, if possible. The api_root # could be relative or absolute, but it needs to be in an absolute form for # relpath to behave as expected. We always expect the api_root to # contain at least two components, but the reference from static needs to # be from only the first component. apiRootBase = os.path.split( os.path.join('/', curConfig['server']['api_root']))[0] root.api.v1.updateHtmlVars({ 'apiRoot': curConfig['server']['api_root'], 'staticRoot': os.path.relpath(routeTable[constants.GIRDER_STATIC_ROUTE_ID], apiRootBase) }) root, appconf, _ = plugin_utilities.loadPlugins(plugins, root, appconf, root.api.v1, buildDag=False) return root, appconf
def mountServerFuse(name, path, level=AccessType.ADMIN, user=None, force=False): """ Mount a FUSE at a specific path with authorization for a given user. :param name: a key for this mount mount. Each mount point must have a distinct key. :param path: the location where this mount will be in the local filesystem. This should be an empty directory. :param level: access level used when checking which resources are available within the FUSE. This is ignored currently, but could be used if non-readonly access is ever implemented. :param user: the user used for authorizing resource access. :param force: if True, all resources are available without checking the user or level. :returns: True if successful. 'present' if the mount is already present. None on failure. """ with _fuseMountsLock: if name in _fuseMounts: if (_fuseMounts[name]['level'] == level and _fuseMounts[name]['user'] == user and _fuseMounts[name]['force'] == force): return 'present' unmountServerFuse(name) entry = { 'level': level, 'user': user, 'force': force, 'path': path, 'stat': dict((key, getattr(os.stat(path), key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')), 'thread': None } try: # We run the file system in a thread, but as a foreground process. # This allows multiple mounted fuses to play well together and stop # when the program is stopped. opClass = ServerFuse(level=level, user=user, force=force, stat=os.stat(path)) options = { # Running in a thread in the foreground makes it easier to # clean up the process when we need to shut it down. 'foreground': True, # Automatically unmount when python we try to mount again 'auto_unmount': True, # Cache files if their size and timestamp haven't changed. # This lets to OS buffer files efficiently. 'auto_cache': True, # We aren't specifying our own inos 'use_ino': False, # read-only file system 'ro': True, } if sys.platform == 'darwin': del options['auto_unmount'] fuseThread = threading.Thread(target=FUSELogError, args=(name, handleFuseMountFailure, opClass, path), kwargs=options) entry['thread'] = fuseThread _fuseMounts[name] = entry fuseThread.daemon = True fuseThread.start() logprint.info('Mounted %s at %s' % (name, path)) events.trigger('server_fuse.mount', {'name': name}) return True except Exception: logger.exception('Failed to mount %s at %s' % (name, path))
def configureServer(test=False, plugins=None, curConfig=None): """ Function to setup the cherrypy server. It configures it, but does not actually start it. :param test: Set to True when running in the tests. :type test: bool :param plugins: If you wish to start the server with a custom set of plugins, pass this as a list of plugins to load. Otherwise, will use the PLUGINS_ENABLED setting value from the db. :param curConfig: The configuration dictionary to update. """ if curConfig is None: curConfig = config.getConfig() appconf = { '/': { 'request.dispatch': cherrypy.dispatch.MethodDispatcher(), 'request.show_tracebacks': test, 'request.methods_with_bodies': ('POST', 'PUT', 'PATCH'), 'response.headers.server': 'Girder %s' % __version__, 'error_page.default': _errorDefault } } # Add MIME types for serving Fontello files from staticdir; # these may be missing or incorrect in the OS mimetypes.add_type('application/vnd.ms-fontobject', '.eot') mimetypes.add_type('application/x-font-ttf', '.ttf') mimetypes.add_type('application/font-woff', '.woff') curConfig.update(appconf) if test: # Force some config params in testing mode curConfig.update({'server': { 'mode': 'testing', 'api_root': 'api/v1', 'static_root': 'static', 'api_static_root': '../static', 'cherrypy_server': True }}) mode = curConfig['server']['mode'].lower() logprint.info('Running in mode: ' + mode) cherrypy.config['engine.autoreload.on'] = mode == 'development' _setupCache() # Don't import this until after the configs have been read; some module # initialization code requires the configuration to be set up. from girder.api import api_main root = webroot.Webroot() api_main.addApiToNode(root) girder.events.setupDaemon() cherrypy.engine.subscribe('start', girder.events.daemon.start) cherrypy.engine.subscribe('stop', girder.events.daemon.stop) if plugins is None: plugins = getPlugins() routeTable = loadRouteTable() info = { 'config': appconf, 'serverRoot': root, 'serverRootPath': routeTable[constants.GIRDER_ROUTE_ID], 'apiRoot': root.api.v1, 'staticRoot': routeTable[constants.GIRDER_STATIC_ROUTE_ID] } plugin._loadPlugins(plugins, info) root, appconf = info['serverRoot'], info['config'] return root, appconf
def configureServer(test=False, plugins=None, curConfig=None): """ Function to setup the cherrypy server. It configures it, but does not actually start it. :param test: Set to True when running in the tests. :type test: bool :param plugins: If you wish to start the server with a custom set of plugins, pass this as a list of plugins to load. Otherwise, will use the PLUGINS_ENABLED setting value from the db. :param curConfig: The configuration dictionary to update. """ if curConfig is None: curConfig = config.getConfig() appconf = { '/': { 'request.dispatch': cherrypy.dispatch.MethodDispatcher(), 'request.show_tracebacks': test, 'request.methods_with_bodies': ('POST', 'PUT', 'PATCH'), 'response.headers.server': 'Girder %s' % __version__, 'error_page.default': _errorDefault } } # Add MIME types for serving Fontello files from staticdir; # these may be missing or incorrect in the OS mimetypes.add_type('application/vnd.ms-fontobject', '.eot') mimetypes.add_type('application/x-font-ttf', '.ttf') mimetypes.add_type('application/font-woff', '.woff') if test: appconf['/src'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients/web/src', } appconf['/test'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients/web/test', } appconf['/clients'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients' } appconf['/plugins'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'plugins', } curConfig.update(appconf) if test: # Force some config params in testing mode curConfig.update({'server': { 'mode': 'testing', 'api_root': 'api/v1', 'static_root': 'static', 'api_static_root': '../static', 'cherrypy_server': True }}) mode = curConfig['server']['mode'].lower() logprint.info('Running in mode: ' + mode) cherrypy.config['engine.autoreload.on'] = mode == 'development' # Don't import this until after the configs have been read; some module # initialization code requires the configuration to be set up. from girder.api import api_main root = webroot.Webroot() api_main.addApiToNode(root) cherrypy.engine.subscribe('start', girder.events.daemon.start) cherrypy.engine.subscribe('stop', girder.events.daemon.stop) if plugins is None: settings = model_importer.ModelImporter().model('setting') plugins = settings.get(constants.SettingKey.PLUGINS_ENABLED, default=()) plugins = list(plugin_utilities.getToposortedPlugins(plugins, ignoreMissing=True)) _configureStaticRoutes(root, plugins) girder.events.bind('model.setting.save.after', '_updateStaticRoutesIfModified', functools.partial(_configureStaticRoutes, root, plugins)) root, appconf, _ = plugin_utilities.loadPlugins( plugins, root, appconf, root.api.v1, buildDag=False) return root, appconf
def mountServerFuse(name, path, level=AccessType.ADMIN, user=None, force=False): """ Mount a FUSE at a specific path with authorization for a given user. :param name: a key for this mount mount. Each mount point must have a distinct key. :param path: the location where this mount will be in the local filesystem. This should be an empty directory. :param level: access level used when checking which resources are available within the FUSE. This is ignored currently, but could be used if non-readonly access is ever implemented. :param user: the user used for authorizing resource access. :param force: if True, all resources are available without checking the user or level. :returns: True if successful. 'present' if the mount is already present. None on failure. """ with _fuseMountsLock: if name in _fuseMounts: if (_fuseMounts[name]['level'] == level and _fuseMounts[name]['user'] == user and _fuseMounts[name]['force'] == force): return 'present' unmountServerFuse(name) entry = { 'level': level, 'user': user, 'force': force, 'path': path, 'stat': dict((key, getattr(os.stat(path), key)) for key in ( 'st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')), 'thread': None } try: # We run the file system in a thread, but as a foreground process. # This allows multiple mounted fuses to play well together and stop # when the program is stopped. opClass = ServerFuse(level=level, user=user, force=force, stat=os.stat(path)) options = { # Running in a thread in the foreground makes it easier to # clean up the process when we need to shut it down. 'foreground': True, # Automatically unmount when python we try to mount again 'auto_unmount': True, # Cache files if their size and timestamp haven't changed. # This lets to OS buffer files efficiently. 'auto_cache': True, # We aren't specifying our own inos 'use_ino': False, # read-only file system 'ro': True, } if sys.platform == 'darwin': del options['auto_unmount'] fuseThread = threading.Thread( target=FUSELogError, args=(opClass, path), kwargs=options) fuseThread.daemon = True fuseThread.start() entry['thread'] = fuseThread _fuseMounts[name] = entry logprint.info('Mounted %s at %s' % (name, path)) events.trigger('server_fuse.mount', {'name': name}) return True except Exception: logger.exception('Failed to mount %s at %s' % (name, path))
def orthorectify(initWorkingSetName, stepName, requestInfo, jobId, outputFolder, imageFiles, dsmFile, dtmFile, rpcFiles, occlusionThreshold=None, denoiseRadius=None): """ Run Girder Worker jobs to orthorectify source images. Requirements: - Danesfield Docker image is available on host :param initWorkingSetName: The name of the top-level working set. :type initWorkingSetName: str :param stepName: The name of the step. :type stepName: str (DanesfieldStep) :param requestInfo: HTTP request and authorization info. :type requestInfo: RequestInfo :param jobId: Job ID. :type jobId: str :param outputFolder: Output folder document. :type outputFolder: dict :param imageFiles: List of image files. :type imageFiles: list[dict] :param dsmFile: DSM file document. :type dsmFile: dict :param dtmFile: DTM file document. :type dtmFile: dict :param rpcFiles: List of RPC files. :type rpcFiles: list[dict] :param occlusionThreshold: :type occlusionThreshold: float :param denoiseRadius: :type denoiseRadius: float :returns: None """ gc = createGirderClient(requestInfo) def createOrthorectifyTask(imageFile, rpcFile): # Set output file name based on input file name orthoName = os.path.splitext(imageFile['name'])[0] + '_ortho.tif' outputVolumePath = VolumePath(orthoName) # Docker container arguments containerArgs = [ 'danesfield/tools/orthorectify.py', # Source image GirderFileIdToVolume(imageFile['_id'], gc=gc), # DSM GirderFileIdToVolume(dsmFile['_id'], gc=gc), # Destination image outputVolumePath, '--dtm', GirderFileIdToVolume(dtmFile['_id'], gc=gc), '--raytheon-rpc', GirderFileIdToVolume(rpcFile['_id'], gc=gc), ] if occlusionThreshold is not None: containerArgs.extend( ['--occlusion-thresh', str(occlusionThreshold)]) if denoiseRadius is not None: containerArgs.extend(['--denoise-radius', str(denoiseRadius)]) # Result hooks # - Upload output files to output folder # - Provide upload metadata upload_kwargs = createUploadMetadata(jobId, stepName) resultHooks = [ GirderUploadVolumePathToFolder(outputVolumePath, outputFolder['_id'], upload_kwargs=upload_kwargs, gc=gc) ] return docker_run.s( **createDockerRunArguments(image=DockerImage.DANESFIELD, containerArgs=containerArgs, jobTitle=('[%s] Orthorectify: %s' % (initWorkingSetName, imageFile['name'])), jobType=stepName, user=requestInfo.user, resultHooks=resultHooks)) # Find RPC file corresponding to each image, or None correspondingRpcFiles = [ next((rpcFile for rpcFile in rpcFiles if rpcFileMatchesImageFile(rpcFile, imageFile)), None) for imageFile in imageFiles ] # For some images, it seems that we're not getting RPC files from # the P3D step. Deciding to simply skip those images and log a # warning instead of raising an exception for now. imagesMissingRpcFiles = [ imageFile['name'] for imageFile, rpcFile in zip(imageFiles, correspondingRpcFiles) if not rpcFile ] if imagesMissingRpcFiles: logprint.info( 'Step: {} -- Warning: Missing RPC files for images: {}'.format( stepName, imagesMissingRpcFiles)) # raise DanesfieldWorkflowException( # 'Missing RPC files for images: {}'.format(imagesMissingRpcFiles), # step=stepName) # Run tasks in parallel using a group; skip if we have no rpcFile # for the given image tasks = [ createOrthorectifyTask(imageFile, rpcFile) for imageFile, rpcFile in zip(imageFiles, correspondingRpcFiles) if rpcFile is not None ] groupResult = group(tasks).delay() DanesfieldWorkflowManager.instance().setGroupResult( jobId, stepName, groupResult) # Add info for job event listeners for result in groupResult.results: addJobInfo(result.job, jobId=jobId, stepName=stepName)
def advance(self, jobId): """ Advance the workflow. Runs all remaining steps that have their dependencies met. Finalizes the job if all steps are complete. :param jobId: Identifier of the job running the workflow. :type jobId: str """ with self._lock: logprint.info( 'DanesfieldWorkflowManager.advance Job={}'.format(jobId)) jobData = self._getJobData(jobId) incompleteSteps = [ step for step in self.workflow.steps if (step.name not in jobData['completedSteps'] and step.name not in jobData['failedSteps']) ] # Skip run-metrics if the AOI is unknown # model = jobData['options'].get('classify-materials', {}).get( # 'model') # if model is None or model == 'STANDARD': # try: # incompleteSteps.remove(RunMetricsStep) # except ValueError as e: # pass logprint.info( 'DanesfieldWorkflowManager.advance IncompleteSteps={}'.format( [step.name for step in incompleteSteps])) runningSteps = [ step for step in self.workflow.steps if step.name in jobData['runningSteps'] ] logprint.info( 'DanesfieldWorkflowManager.advance RunningSteps={}'.format( [step.name for step in runningSteps])) # Finalize job if either: # - All steps have completed, or # - A previous step failed and no steps are running # Note that it's possible that future steps could run # successfully if they don't depend on the failed step; # that's not currently handled. if not runningSteps and \ (not incompleteSteps or jobData['failedSteps']): self.finalizeJob(jobId) return readySteps = [ step for step in incompleteSteps if step.name not in jobData['runningSteps'] and step.dependencies.issubset(jobData['completedSteps']) ] logprint.info( 'DanesfieldWorkflowManager.advance ReadySteps={}'.format( [step.name for step in readySteps])) if not runningSteps and not readySteps and incompleteSteps: logprint.error( 'DanesfieldWorkflowManager.advance StuckSteps={}'.format( [step.name for step in incompleteSteps])) # TODO: More error notification/handling/clean up return jobInfo = JobInfo(jobId=jobId, requestInfo=jobData['requestInfo'], workingSets=jobData['workingSets'], standardOutput=jobData['standardOutput'], outputFolder=jobData['outputFolder'], options=jobData['options']) if readySteps: adminUser = User().getAdmins().next() for step in readySteps: # Create output directory for step outputFolder = Folder().createFolder( parent=jobInfo.outputFolder, name=step.name, parentType='folder', public=False, creator=adminUser, reuseExisting=True) jobData['runningSteps'].add(step.name) step.run(jobInfo, outputFolder)
def configureServer(mode=None, plugins=None, curConfig=None): """ Function to setup the cherrypy server. It configures it, but does not actually start it. :param mode: The server mode to start in. :type mode: string :param plugins: If you wish to start the server with a custom set of plugins, pass this as a list of plugins to load. Otherwise, all installed plugins will be loaded. :param curConfig: The configuration dictionary to update. """ if curConfig is None: curConfig = config.getConfig() appconf = { '/': { 'request.dispatch': cherrypy.dispatch.MethodDispatcher(), 'request.show_tracebacks': mode == ServerMode.TESTING, 'request.methods_with_bodies': ('POST', 'PUT', 'PATCH'), 'response.headers.server': 'Girder %s' % __version__, 'error_page.default': _errorDefault } } # Add MIME types for serving Fontello files from staticdir; # these may be missing or incorrect in the OS mimetypes.add_type('application/vnd.ms-fontobject', '.eot') mimetypes.add_type('application/x-font-ttf', '.ttf') mimetypes.add_type('application/font-woff', '.woff') curConfig.update(appconf) if mode: curConfig['server']['mode'] = mode logprint.info('Running in mode: ' + curConfig['server']['mode']) cherrypy.config['engine.autoreload.on'] = mode == ServerMode.DEVELOPMENT _setupCache() # Don't import this until after the configs have been read; some module # initialization code requires the configuration to be set up. from girder.api import api_main root = webroot.Webroot() api_main.addApiToNode(root) girder.events.setupDaemon() cherrypy.engine.subscribe('start', girder.events.daemon.start) cherrypy.engine.subscribe('stop', girder.events.daemon.stop) routeTable = loadRouteTable() info = { 'config': appconf, 'serverRoot': root, 'serverRootPath': routeTable[constants.GIRDER_ROUTE_ID], 'apiRoot': root.api.v1, } plugin._loadPlugins(info, plugins) root, appconf = info['serverRoot'], info['config'] return root, appconf
def getDbConnection(uri=None, replicaSet=None, autoRetry=True, **kwargs): """ Get a MongoClient object that is connected to the configured database. We lazy-instantiate a module-level singleton, the MongoClient objects manage their own connection pools internally. Any extra kwargs you pass to this method will be passed through to the MongoClient. :param uri: if specified, connect to this mongo db rather than the one in the config. :param replicaSet: if uri is specified, use this replica set. :param autoRetry: if this connection should automatically retry operations in the event of an AutoReconnect exception. If you're testing the connection, set this to False. If disabled, this also will not cache the mongo client, so make sure to only disable if you're testing a connection. :type autoRetry: bool """ global _dbClients origKey = (uri, replicaSet) if origKey in _dbClients: return _dbClients[origKey] if uri is None or uri == '': dbConf = getDbConfig() uri = dbConf.get('uri') replicaSet = dbConf.get('replica_set') clientOptions = { # This is the maximum time between when we fetch data from a cursor. # If it times out, the cursor is lost and we can't reconnect. If it # isn't set, we have issues with replica sets when the primary goes # down. This value can be overridden in the mongodb uri connection # string with the socketTimeoutMS. 'socketTimeoutMS': 60000, 'connectTimeoutMS': 20000, 'serverSelectionTimeoutMS': 20000, 'read_preference': ReadPreference.SECONDARY_PREFERRED, 'replicaSet': replicaSet } clientOptions.update(kwargs) if uri is None: dbUriRedacted = 'mongodb://*****:*****@') if len(parts) == 2: dbUriRedacted = 'mongodb://' + parts[1] else: dbUriRedacted = uri client = pymongo.MongoClient(uri, **clientOptions) # Make sure we can connect to the mongo server at startup client.server_info() if autoRetry: client = MongoProxy(client, logger=logger) _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client desc = '' if replicaSet: desc += ', replica set: %s' % replicaSet logprint.info('Connected to MongoDB: %s%s' % (dbUriRedacted, desc)) return client
def initJob(self, requestInfo, workingSet, outputFolder, options, previousWorkingSet=None): """ Initialize a new job to run the workflow. :param requestInfo: HTTP request and authorization info. :type requestInfo: RequestInfo :param workingSet: Source image working set. :type workingSet: dict :param outputFolder: Output folder document. :type outputFolder: dict :returns: Job identifier. :param options: Processing options. :type options: dict """ with self._lock: if not self.workflow: raise DanesfieldWorkflowException('Workflow not configured') jobId = self._createJobId() # TODO: Improve job data storage jobData = { # Running steps 'runningSteps': set(), # Completed steps 'completedSteps': set(), # Failed steps: 'failedSteps': set(), # Request info 'requestInfo': requestInfo, # Working sets indexed by step name 'workingSets': { DanesfieldStep.INIT: workingSet }, # Files indexed by step name 'files': {}, # Standard output indexed by step name 'standardOutput': {}, # Output folder 'outputFolder': outputFolder, # Options 'options': options if options is not None else {}, # For composite steps, list of [Celery GroupResult, # number of jobs remaining], indexed by step name 'groupResult': {} } logprint.info( 'DanesfieldWorkflowManager.initJob Job={} WorkingSet={}'. format(jobId, workingSet['_id'])) # If a workingSet exists for a given step, we include that # working set in the current jobData and flag it as being # complete (the step will not be re-run) step_name_re = re.compile(".*:\\s(.*)") for ws in WorkingSet().find( {'parentWorkingSetId': workingSet['_id']}): match = re.match(step_name_re, ws['name']) if match: stepName = match.group(1) jobData['workingSets'][stepName] = ws # Set the skipped job as completed jobData['completedSteps'].add(stepName) logprint.info( 'DanesfieldWorkflowManager.skippingStep Job={} ' 'StepName={}'.format(jobId, stepName)) else: logprint.warning( 'DanesfieldWorkflowManager.unableToParseStepName ' 'Job={} WorkingSetName={}'.format(jobId, ws['name'])) self._jobData[jobId] = jobData return jobId
}, { 'moduleName': '.test', 'className': 'TestTileSource' }, { 'moduleName': '.dummy', 'className': 'DummyTileSource' }] for source in sourceList: try: # Don't try to load girder sources if we couldn't import girder if not girder and source.get('girder'): continue # For each of our sources, try to import the named class from the # source module className = source['className'] sourceModule = __import__( source['moduleName'].lstrip('.'), globals(), locals(), [className], len(source['moduleName']) - len(source['moduleName'].lstrip('.'))) sourceClass = getattr(sourceModule, className) # Add the source class to the locals name so that it can be reached by # importing the tilesource module locals().update({className: sourceClass}) # add it to our list of exports __all__.append(className) # add it to our dictionary of available sources if it has a name if getattr(sourceClass, 'name', None): AvailableTileSources[sourceClass.name] = sourceClass except (ImportError, OSError): logprint.info('Notice: Could not import %s' % className)
def getDbConnection(uri=None, replicaSet=None, autoRetry=True, quiet=False, **kwargs): """ Get a MongoClient object that is connected to the configured database. We lazy-instantiate a module-level singleton, the MongoClient objects manage their own connection pools internally. Any extra kwargs you pass to this method will be passed through to the MongoClient. :param uri: if specified, connect to this mongo db rather than the one in the config. :param replicaSet: if uri is specified, use this replica set. :param autoRetry: if this connection should automatically retry operations in the event of an AutoReconnect exception. If you're testing the connection, set this to False. If disabled, this also will not cache the mongo client, so make sure to only disable if you're testing a connection. :type autoRetry: bool :param quiet: if true, don't logprint warnings and success. :type quiet: bool """ global _dbClients origKey = (uri, replicaSet) if origKey in _dbClients: return _dbClients[origKey] dbConf = getDbConfig() if uri is None or uri == '': uri = dbConf.get('uri') replicaSet = dbConf.get('replica_set') clientOptions = { # This is the maximum time between when we fetch data from a cursor. # If it times out, the cursor is lost and we can't reconnect. If it # isn't set, we have issues with replica sets when the primary goes # down. This value can be overridden in the mongodb uri connection # string with the socketTimeoutMS. 'socketTimeoutMS': 60000, 'connectTimeoutMS': 20000, 'serverSelectionTimeoutMS': 20000, 'readPreference': 'secondaryPreferred', 'replicaSet': replicaSet, 'w': 'majority' } # All other options in the [database] section will be passed directly as # options to the mongo client for opt, val in six.viewitems(dict(dbConf)): if opt not in {'uri', 'replica_set'}: clientOptions[opt] = val # Finally, kwargs take precedence clientOptions.update(kwargs) # if the connection URI overrides any option, honor it above our own # settings. uriParams = urllib.parse.parse_qs(urllib.parse.urlparse(uri).query) for key in uriParams: if key in clientOptions: del clientOptions[key] if uri is None: dbUriRedacted = 'mongodb://*****:*****@') if len(parts) == 2: dbUriRedacted = 'mongodb://' + parts[1] else: dbUriRedacted = uri client = pymongo.MongoClient(uri, **clientOptions) if not quiet: desc = '' if replicaSet: desc += ', replica set: %s' % replicaSet logprint.info('Connecting to MongoDB: %s%s' % (dbUriRedacted, desc)) # Make sure we can connect to the mongo server at startup client.server_info() if autoRetry: client = MongoProxy(client, logger=logger) _dbClients[origKey] = _dbClients[(uri, replicaSet)] = client return client
def configureServer(test=False, plugins=None, curConfig=None): """ Function to setup the cherrypy server. It configures it, but does not actually start it. :param test: Set to True when running in the tests. :type test: bool :param plugins: If you wish to start the server with a custom set of plugins, pass this as a list of plugins to load. Otherwise, will use the PLUGINS_ENABLED setting value from the db. :param curConfig: The configuration dictionary to update. """ if curConfig is None: curConfig = config.getConfig() appconf = { '/': { 'request.dispatch': cherrypy.dispatch.MethodDispatcher(), 'request.show_tracebacks': test, 'request.methods_with_bodies': ('POST', 'PUT', 'PATCH'), 'response.headers.server': 'Girder %s' % __version__, 'error_page.default': _errorDefault } } # Add MIME types for serving Fontello files from staticdir; # these may be missing or incorrect in the OS mimetypes.add_type('application/vnd.ms-fontobject', '.eot') mimetypes.add_type('application/x-font-ttf', '.ttf') mimetypes.add_type('application/font-woff', '.woff') if test: appconf['/src'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients/web/src', } appconf['/test'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients/web/test', } appconf['/clients'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'clients' } appconf['/plugins'] = { 'tools.staticdir.on': True, 'tools.staticdir.root': constants.STATIC_ROOT_DIR, 'tools.staticdir.dir': 'plugins', } curConfig.update(appconf) if test: # Force some config params in testing mode curConfig.update({ 'server': { 'mode': 'testing', 'api_root': 'api/v1', 'static_root': 'static', 'api_static_root': '../static', 'cherrypy_server': True } }) mode = curConfig['server']['mode'].lower() logprint.info('Running in mode: ' + mode) cherrypy.config['engine.autoreload.on'] = mode == 'development' # Don't import this until after the configs have been read; some module # initialization code requires the configuration to be set up. from girder.api import api_main root = webroot.Webroot() api_main.addApiToNode(root) cherrypy.engine.subscribe('start', girder.events.daemon.start) cherrypy.engine.subscribe('stop', girder.events.daemon.stop) if plugins is None: settings = model_importer.ModelImporter().model('setting') plugins = settings.get(constants.SettingKey.PLUGINS_ENABLED, default=()) plugins = list( plugin_utilities.getToposortedPlugins(plugins, ignoreMissing=True)) _configureStaticRoutes(root, plugins) girder.events.bind( 'model.setting.save.after', '_updateStaticRoutesIfModified', functools.partial(_configureStaticRoutes, root, plugins)) root, appconf, _ = plugin_utilities.loadPlugins(plugins, root, appconf, root.api.v1, buildDag=False) return root, appconf
def pansharpen(initWorkingSetName, stepName, requestInfo, jobId, outputFolder, imageFiles): """ Run Girder Worker jobs to pansharpen orthorectified images. Requirements: - Danesfield Docker image is available on host :param initWorkingSetName: The name of the top-level working set. :type initWorkingSetName: str :param stepName: The name of the step. :type stepName: str (DanesfieldStep) :param requestInfo: HTTP request and authorization info. :type requestInfo: RequestInfo :param jobId: Job ID. :type jobId: str :param outputFolder: Output folder document. :type outputFolder: dict :param imageFiles: List of orthorectified image files. :type imageFiles: list[dict] :returns: None """ gc = createGirderClient(requestInfo) def createPansharpenTask(prefix, panImageFile, msiImageFile): # Set output file name based on prefix outputName = prefix + '_ortho_pansharpened.tif' outputVolumePath = VolumePath(outputName) # Docker container arguments containerArgs = [ 'gdal_pansharpen.py', # PAN image GirderFileIdToVolume(panImageFile['_id'], gc=gc), # MSI image GirderFileIdToVolume(msiImageFile['_id'], gc=gc), # Output image outputVolumePath ] # Result hooks # - Upload output files to output folder # - Provide upload metadata upload_kwargs = createUploadMetadata(jobId, stepName) resultHooks = [ GirderUploadVolumePathToFolder( outputVolumePath, outputFolder['_id'], upload_kwargs=upload_kwargs, gc=gc) ] return docker_run.s( **createDockerRunArguments( image=DockerImage.DANESFIELD, containerArgs=containerArgs, jobTitle='[%s] Pansharpen: %s' % (initWorkingSetName, prefix), jobType=stepName, user=requestInfo.user, resultHooks=resultHooks ) ) # Group pairs of PAN and MSI images by prefix pairs = {} for imageFile in imageFiles: prefix = getPrefix(imageFile['name']) if prefix is None: raise DanesfieldWorkflowException( 'Invalid orthorectified image file name: {}'. format(imageFile['name']), step=stepName) pairs.setdefault(prefix, {'pan': None, 'msi': None}) if isPanImage(imageFile): pairs[prefix]['pan'] = imageFile elif isMsiImage(imageFile): pairs[prefix]['msi'] = imageFile else: raise DanesfieldWorkflowException( 'Unrecognized image: {}'. format(imageFile['name']), step=stepName) # Ensure that both types of images exist for each prefix # Logging a warning for now and skipping rather than treating this # as an exception for prefix, files in pairs.items(): panFile = files['pan'] msiFile = files['msi'] if not panFile or not msiFile: logprint.info("Step: {} -- Warning: Don't have both PAN and MSI \ images for: {}".format(stepName, prefix)) # raise DanesfieldWorkflowException( # 'Corresponding PAN and MSI orthorectified images not found') # Run tasks in parallel using a group tasks = [ createPansharpenTask(imagePrefix, files['pan'], files['msi']) for imagePrefix, files in pairs.items() if files['pan'] and files['msi'] ] groupResult = group(tasks).delay() DanesfieldWorkflowManager.instance().setGroupResult(jobId, stepName, groupResult) # Add info for job event listeners for result in groupResult.results: addJobInfo(result.job, jobId=jobId, stepName=stepName)