def __init__(self, client_address: str = None, request_port: int = None, response_port: int = None): super(EDASapp, self).__init__( get_or_else(client_address, EdasEnv.get("wps.server.address", "*")), get_or_else(request_port, EdasEnv.get("request.port", 4556)), get_or_else(response_port, EdasEnv.get("response.port", 4557))) self.process = "edas" self.processManager = None atexit.register(self.term, "ShutdownHook Called") self.logger.info("STARTUP CLUSTER") self.processManager = ProcessManager.initManager(EdasEnv.parms) self.scheduler_info = self.processManager.client.scheduler_info() workers: Dict = self.scheduler_info.pop("workers") self.logger.info(" @@@@@@@ SCHEDULER INFO: " + str(self.scheduler_info)) self.logger.info(f" N Workers: {len(workers)} ") for addr, specs in workers.items(): self.logger.info(f" -----> Worker {addr}: {specs}") log_metrics = EdasEnv.parms.get("log.cwt.metrics", True) if log_metrics: self.metricsThread = Thread(target=self.trackCwtMetrics) self.metricsThread.start()
def getSession(self, dataSource: DataSource) -> Session: session: Session = None if dataSource.auth == "esgf": from pydap.cas.esgf import setup_session openid = EdasEnv.get("esgf.openid", "") password = EdasEnv.get("esgf.password", "") username = EdasEnv.get("esgf.username", openid.split("/")[-1]) session = setup_session(openid, password, username, check_url=dataSource.address) elif dataSource.auth == "urs": from pydap.cas.urs import setup_session username = EdasEnv.get("urs.username", "") password = EdasEnv.get("urs.password", "") session = setup_session(username, password, check_url=dataSource.address) elif dataSource.auth == "cookie": from pydap.cas.get_cookies import setup_session username = EdasEnv.get("auth.username", "") password = EdasEnv.get("auth.password", "") auth_url = EdasEnv.get("auth.url", "") session = setup_session(auth_url, username, password) elif dataSource.auth is not None: raise Exception("Unknown authentication method: " + dataSource.auth) return session
def buildWorkflow(self, request: TaskRequest, node: WorkflowNode, inputs: EDASDatasetCollection ) -> EDASDatasetCollection: snode: SourceNode = node results = EDASDatasetCollection( "InputKernel.build-" + node.name ) t0 = time.time() dset = self.getCachedDataset( snode ) if dset is not None: self.importToDatasetCollection(results, request, snode, dset.xr ) self.logger.info( "Access input data from cache: " + dset.id ) else: dataSource: DataSource = snode.varSource.dataSource if dataSource.type == SourceType.collection: from edas.collection.agg import Axis as AggAxis, File as AggFile collection = Collection.new( dataSource.address ) self.logger.info("Input collection: " + dataSource.address ) aggs = collection.sortVarsByAgg( snode.varSource.vids ) domain = request.operationManager.domains.getDomain( snode.domain ) if domain is not None: timeBounds = domain.findAxisBounds(Axis.T) startDate = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.start) endDate = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.end) else: startDate = endDate = None for ( aggId, vars ) in aggs.items(): use_chunks = True pathList = collection.pathList(aggId) if startDate is None else collection.periodPathList(aggId,startDate,endDate) assert len(pathList) > 0, f"No files found in aggregation {aggId} for date range {startDate} - {endDate} " nFiles = len(pathList) if use_chunks: nReadPartitions = int( EdasEnv.get( "mfdataset.npartitions", 250 ) ) agg = collection.getAggregation(aggId) nchunks, fileSize = agg.getChunkSize( nReadPartitions, nFiles ) chunk_kwargs = {} if nchunks is None else dict(chunks={"time": nchunks}) self.logger.info( f"Open mfdataset: vars={vars}, NFILES={nFiles}, FileSize={fileSize}, FILES[0]={pathList[0]}, chunk_kwargs={chunk_kwargs}, startDate={startDate}, endDate={endDate}, domain={domain}" ) else: chunk_kwargs = {} self.logger.info( f"Open mfdataset: vars={vars}, NFILES={nFiles}, FILES[0]={pathList[0]}" ) dset = xr.open_mfdataset( pathList, engine='netcdf4', data_vars=vars, parallel=True, **chunk_kwargs ) self.logger.info(f"Import to collection") self.importToDatasetCollection( results, request, snode, dset ) self.logger.info(f"Collection import complete.") elif dataSource.type == SourceType.file: self.logger.info( "Reading data from address: " + dataSource.address ) files = glob.glob( dataSource.address ) parallel = len(files) > 1 assert len(files) > 0, f"No files matching path {dataSource.address}" dset = xr.open_mfdataset(dataSource.address, engine='netcdf4', data_vars=snode.varSource.ids, parallel=parallel ) self.importToDatasetCollection(results, request, snode, dset) elif dataSource.type == SourceType.archive: self.logger.info( "Reading data from archive: " + dataSource.address ) dataPath = request.archivePath( dataSource.address ) dset = xr.open_mfdataset( [dataPath] ) self.importToDatasetCollection(results, request, snode, dset) elif dataSource.type == SourceType.dap: nchunks = request.runargs.get( "ncores", 8 ) self.logger.info( f" --------------->>> Reading data from address: {dataSource.address}, nchunks = {nchunks}" ) # dset = xr.open_mfdataset( [dataSource.address], engine="netcdf4", data_vars=snode.varSource.ids, chunks={"time":nchunks} ) dset = xr.open_dataset( dataSource.address, engine="netcdf4", chunks={"time":nchunks} ) self.importToDatasetCollection( results, request, snode, dset ) self.logger.info( f"Access input data source {dataSource.address}, time = {time.time() - t0} sec" ) self.logger.info( "@L: LOCATION=> host: {}, thread: {}, proc: {}".format( socket.gethostname(), threading.get_ident(), os.getpid() ) ) return results
def __init__(self, nthreads=0, nprocs=1, nohost=False, remote_python=None, memory_limit=None, worker_port=None, nanny_port=None): Thread.__init__(self) self.logger = EDASLogger.getLogger() self.nthreads = nthreads self.nprocs = nprocs self.worker_addrs = self.getHosts() self.ssh_username = os.environ.get('USER', None) self.ssh_port = 22 self.ssh_private_key = get_private_key() self.scheduler_addr = getHost() self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786)) self.logdir = os.path.expanduser("~/.edas/logs") self.active = False self.nohost = nohost self.remote_python = remote_python self.memory_limit = memory_limit self.worker_port = worker_port self.nanny_port = nanny_port # Keep track of all running threads self.threads = []
def startup_scheduler(self): os.environ["PATH"] = ":".join([self.EDAS_BIN_DIR, os.environ["PATH"]]) if not EdasEnv.getBool("edas.manage.scheduler", True): return None # os.environ["PKEY_OPTS"] = "--ssh-private-key=" + get_private_key() os.environ["PATH"] = ":".join([self.EDAS_BIN_DIR, os.environ["PATH"]]) bokeh_port = int(EdasEnv.get("dashboard.port", 8787)) self.logger.info( "Starting up scheduler using script {} with host {} and port {}". format(self.SCHEDULER_SCRIPT, self.scheduler_host, self.scheduler_port)) args = [ sys.executable, self.SCHEDULER_SCRIPT, "--host", self.scheduler_host, "--port", str(self.scheduler_port), "--bokeh-port", str(bokeh_port) ] return subprocess.Popen(args, stderr=subprocess.PIPE)
def __init__(self): Cluster.__init__(self) self.logger = EDASLogger.getLogger() self.scheduler_host = getHost() self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786)) self.schedulerProcess = self.startup_scheduler() time.sleep(14) self.clusterThread = self.startup_cluster()
def __init__(self, _proj: str, _exp: str, appConf: Dict[str, str] = None): super(DistributedTestManager, self).__init__(_proj, _exp) EdasEnv.update(appConf) log_metrics = appConf.get("log_metrics", False) self.processManager = ProcessManager.initManager(EdasEnv.parms) time.sleep(10) self.processing = False self.scheduler_info = self.processManager.client.scheduler_info() self.workers: Dict = self.scheduler_info.pop("workers") self.logger.info(" @@@@@@@ SCHEDULER INFO: " + str(self.scheduler_info)) self.logger.info(f" N Workers: {len(self.workers)} ") for addr, specs in self.workers.items(): self.logger.info(f" -----> Worker {addr}: {specs}") if log_metrics: self.metricsThread = Thread(target=self.trackCwtMetrics) self.metricsThread.start()
def getHosts(self): hostfile = EdasEnv.get("hostfile.path", os.path.expanduser("~/.edas/conf/hosts")) assert os.path.isfile( hostfile ), "Error, the EDAS hosts file '{}' does not exist. Copy edas/resourses/hosts.template to '{}' and edit.".format( hostfile, hostfile) with open(hostfile) as f: return f.read().split()
def validate(cls, _address: str, stype: SourceType = SourceType.uri): allowed_sources = [ r.strip() for r in EdasEnv.get("sources.allowed", "collection,https").split(",") ] toks = _address.split(":") scheme = toks[0].lower() if (stype.name.lower() == "uri") and (scheme in allowed_sources): if scheme == "https": trusted_servers = [ r.strip() for r in EdasEnv.get("trusted.dap.servers", "").split(",") ] for trusted_server in trusted_servers: if trusted_server in _address: return scheme, toks[1] raise Exception( f"Attempt to access untrusted dap server: {_address}\n\t Trusted servers: {trusted_servers}\n\t Use parameter 'trusted.dap.servers' in app.conf to list trusted addresses, e.g. 'trusted.dap.servers=https://aims3.llnl.gov/thredds/dodsC/'" ) else: return scheme, toks[1] else: raise Exception("Unallowed scheme '{}' in url: {}".format( scheme, _address))
variable = "t" domains = [{"name": "d0"}] variables = [{ "uri": f"collection://{collection}:", "name": f"{variable}:v0", "domain": "d0" }] operations = [{"name": "xarray.ave", "input": "v0", "axes": "t"}] local = True scheduler = "127.0.0.1:8786" if __name__ == '__main__': print(f"Running test") appConf = {"sources.allowed": "collection,https", "log.metrics": "true"} EdasEnv.update(appConf) if local: print(f"Initializing Local Dask cluster") client = Client() else: if scheduler is None: cluster = EDASCluster() print( "Initializing Dask-distributed cluster with scheduler address: " + cluster.scheduler_address) client = Client(cluster.scheduler_address, timeout=64) time.sleep(20) else: print("Initializing client with existing scheduler at: " + scheduler)
def __init__(self): self.arrayCache: Dict[str, EDASArray] = OrderedDict() self.maxSize = SizeParser.parse(EdasEnv.get("cache.size.max", "500M")) self.currentSize = 0
def startup_cluster(self): if not EdasEnv.getBool("edas.manage.cluster", True): return None clusterThread = EDASKClusterThread() clusterThread.start() return clusterThread
def __init__(self, _proj: str, _exp: str, appConf: Dict[str, str] = None): super(LocalTestManager, self).__init__(_proj, _exp) EdasEnv.update(appConf) self.processManager = ProcessManager.initManager(EdasEnv.parms)
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload, preload_argv, tls_ca_file, tls_cert, tls_key): logger = SchedulerLogger.getLogger() enable_proctitle_on_current() enable_proctitle_on_children() log_metrics = EdasEnv.getBool("log.metrics", False) logger.info(f"Log Metrics: {log_metrics}") plugins = [EDASSchedulerPlugin(logger)] if log_metrics else [] sec = Security( tls_ca_file=tls_ca_file, tls_scheduler_cert=tls_cert, tls_scheduler_key=tls_key, ) if not host and (tls_ca_file or tls_cert or tls_key): host = 'tls://' if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix='scheduler-') local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {} if _bokeh: try: from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_port)] = (BokehScheduler, { 'prefix': bokeh_prefix }) except ImportError as error: if str(error).startswith('No module named'): logger.info( 'Web dashboard not loaded. Unable to import bokeh') else: logger.info('Unable to import bokeh: %s' % str(error)) scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file, security=sec) for plugin in plugins: logger.info(f"@SP: Adding scheduler plugin: {plugin}") scheduler.add_plugin(plugin) scheduler.start(addr) comm = Comm(scheduler) comm.start() if not preload: preload = dask.config.get('distributed.scheduler.preload', {}) if not preload_argv: preload_argv = dask.config.get('distributed.scheduler.preload-argv', {}) preload_modules(preload, parameter=scheduler, file_dir=local_directory, argv=preload_argv) logger.info('Local Directory: %26s', local_directory) logger.info('-' * 47) install_signal_handlers(loop) def shutdown_scheduler(): comm.terminate() scheduler.stop() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr) def close_loop(): loop.stop() loop.close() shutdown_scheduler() atexit.register(close_loop) try: loop.start() loop.close() finally: shutdown_scheduler()
def __init__(self, _proj: str, _exp: str, appConf: Dict[str, str] = None): super(LocalTestManager, self).__init__(_proj, _exp) EdasEnv.update(appConf)