Beispiel #1
0
 def __init__(self,
              client_address: str = None,
              request_port: int = None,
              response_port: int = None):
     super(EDASapp, self).__init__(
         get_or_else(client_address, EdasEnv.get("wps.server.address",
                                                 "*")),
         get_or_else(request_port, EdasEnv.get("request.port", 4556)),
         get_or_else(response_port, EdasEnv.get("response.port", 4557)))
     self.process = "edas"
     self.processManager = None
     atexit.register(self.term, "ShutdownHook Called")
     self.logger.info("STARTUP CLUSTER")
     self.processManager = ProcessManager.initManager(EdasEnv.parms)
     self.scheduler_info = self.processManager.client.scheduler_info()
     workers: Dict = self.scheduler_info.pop("workers")
     self.logger.info(" @@@@@@@ SCHEDULER INFO: " +
                      str(self.scheduler_info))
     self.logger.info(f" N Workers: {len(workers)} ")
     for addr, specs in workers.items():
         self.logger.info(f"  -----> Worker {addr}: {specs}")
     log_metrics = EdasEnv.parms.get("log.cwt.metrics", True)
     if log_metrics:
         self.metricsThread = Thread(target=self.trackCwtMetrics)
         self.metricsThread.start()
Beispiel #2
0
 def getSession(self, dataSource: DataSource) -> Session:
     session: Session = None
     if dataSource.auth == "esgf":
         from pydap.cas.esgf import setup_session
         openid = EdasEnv.get("esgf.openid", "")
         password = EdasEnv.get("esgf.password", "")
         username = EdasEnv.get("esgf.username", openid.split("/")[-1])
         session = setup_session(openid,
                                 password,
                                 username,
                                 check_url=dataSource.address)
     elif dataSource.auth == "urs":
         from pydap.cas.urs import setup_session
         username = EdasEnv.get("urs.username", "")
         password = EdasEnv.get("urs.password", "")
         session = setup_session(username,
                                 password,
                                 check_url=dataSource.address)
     elif dataSource.auth == "cookie":
         from pydap.cas.get_cookies import setup_session
         username = EdasEnv.get("auth.username", "")
         password = EdasEnv.get("auth.password", "")
         auth_url = EdasEnv.get("auth.url", "")
         session = setup_session(auth_url, username, password)
     elif dataSource.auth is not None:
         raise Exception("Unknown authentication method: " +
                         dataSource.auth)
     return session
Beispiel #3
0
    def buildWorkflow(self, request: TaskRequest, node: WorkflowNode, inputs: EDASDatasetCollection )  -> EDASDatasetCollection:
        snode: SourceNode = node
        results = EDASDatasetCollection( "InputKernel.build-" + node.name )
        t0 = time.time()
        dset = self.getCachedDataset( snode )
        if dset is not None:
            self.importToDatasetCollection(results, request, snode, dset.xr )
            self.logger.info( "Access input data from cache: " + dset.id )
        else:
            dataSource: DataSource = snode.varSource.dataSource
            if dataSource.type == SourceType.collection:
                from edas.collection.agg import Axis as AggAxis, File as AggFile
                collection = Collection.new( dataSource.address )
                self.logger.info("Input collection: " + dataSource.address )
                aggs = collection.sortVarsByAgg( snode.varSource.vids )
                domain = request.operationManager.domains.getDomain( snode.domain )
                if domain is not None:
                    timeBounds = domain.findAxisBounds(Axis.T)
                    startDate = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.start)
                    endDate   = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.end)
                else: startDate = endDate = None
                for ( aggId, vars ) in aggs.items():
                    use_chunks = True
                    pathList = collection.pathList(aggId) if startDate is None else collection.periodPathList(aggId,startDate,endDate)
                    assert len(pathList) > 0, f"No files found in aggregation {aggId} for date range {startDate} - {endDate} "
                    nFiles = len(pathList)
                    if use_chunks:
                        nReadPartitions = int( EdasEnv.get( "mfdataset.npartitions", 250 ) )
                        agg = collection.getAggregation(aggId)
                        nchunks, fileSize = agg.getChunkSize( nReadPartitions, nFiles )
                        chunk_kwargs = {} if nchunks is None else dict(chunks={"time": nchunks})
                        self.logger.info( f"Open mfdataset: vars={vars}, NFILES={nFiles}, FileSize={fileSize}, FILES[0]={pathList[0]}, chunk_kwargs={chunk_kwargs}, startDate={startDate}, endDate={endDate}, domain={domain}" )
                    else:
                        chunk_kwargs = {}
                        self.logger.info( f"Open mfdataset: vars={vars},  NFILES={nFiles}, FILES[0]={pathList[0]}" )
                    dset = xr.open_mfdataset( pathList, engine='netcdf4', data_vars=vars, parallel=True, **chunk_kwargs )
                    self.logger.info(f"Import to collection")
                    self.importToDatasetCollection( results, request, snode, dset )
                    self.logger.info(f"Collection import complete.")
            elif dataSource.type == SourceType.file:
                self.logger.info( "Reading data from address: " + dataSource.address )
                files = glob.glob( dataSource.address )
                parallel = len(files) > 1
                assert len(files) > 0, f"No files matching path {dataSource.address}"
                dset = xr.open_mfdataset(dataSource.address, engine='netcdf4', data_vars=snode.varSource.ids, parallel=parallel )
                self.importToDatasetCollection(results, request, snode, dset)
            elif dataSource.type == SourceType.archive:
                self.logger.info( "Reading data from archive: " + dataSource.address )
                dataPath =  request.archivePath( dataSource.address )
                dset = xr.open_mfdataset( [dataPath] )
                self.importToDatasetCollection(results, request, snode, dset)
            elif dataSource.type == SourceType.dap:
                nchunks = request.runargs.get( "ncores", 8 )
                self.logger.info( f" --------------->>> Reading data from address: {dataSource.address}, nchunks = {nchunks}" )
#                dset = xr.open_mfdataset( [dataSource.address], engine="netcdf4", data_vars=snode.varSource.ids, chunks={"time":nchunks} )
                dset = xr.open_dataset( dataSource.address, engine="netcdf4", chunks={"time":nchunks} )
                self.importToDatasetCollection( results, request, snode, dset )
            self.logger.info( f"Access input data source {dataSource.address}, time = {time.time() - t0} sec" )
            self.logger.info( "@L: LOCATION=> host: {}, thread: {}, proc: {}".format( socket.gethostname(), threading.get_ident(), os.getpid() ) )
        return results
Beispiel #4
0
    def __init__(self,
                 nthreads=0,
                 nprocs=1,
                 nohost=False,
                 remote_python=None,
                 memory_limit=None,
                 worker_port=None,
                 nanny_port=None):
        Thread.__init__(self)
        self.logger = EDASLogger.getLogger()
        self.nthreads = nthreads
        self.nprocs = nprocs
        self.worker_addrs = self.getHosts()

        self.ssh_username = os.environ.get('USER', None)
        self.ssh_port = 22
        self.ssh_private_key = get_private_key()
        self.scheduler_addr = getHost()
        self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786))
        self.logdir = os.path.expanduser("~/.edas/logs")
        self.active = False

        self.nohost = nohost
        self.remote_python = remote_python
        self.memory_limit = memory_limit
        self.worker_port = worker_port
        self.nanny_port = nanny_port

        # Keep track of all running threads
        self.threads = []
Beispiel #5
0
 def startup_scheduler(self):
     os.environ["PATH"] = ":".join([self.EDAS_BIN_DIR, os.environ["PATH"]])
     if not EdasEnv.getBool("edas.manage.scheduler", True): return None
     #        os.environ["PKEY_OPTS"]  = "--ssh-private-key=" + get_private_key()
     os.environ["PATH"] = ":".join([self.EDAS_BIN_DIR, os.environ["PATH"]])
     bokeh_port = int(EdasEnv.get("dashboard.port", 8787))
     self.logger.info(
         "Starting up scheduler using script {} with host {} and port {}".
         format(self.SCHEDULER_SCRIPT, self.scheduler_host,
                self.scheduler_port))
     args = [
         sys.executable, self.SCHEDULER_SCRIPT, "--host",
         self.scheduler_host, "--port",
         str(self.scheduler_port), "--bokeh-port",
         str(bokeh_port)
     ]
     return subprocess.Popen(args, stderr=subprocess.PIPE)
Beispiel #6
0
 def __init__(self):
     Cluster.__init__(self)
     self.logger = EDASLogger.getLogger()
     self.scheduler_host = getHost()
     self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786))
     self.schedulerProcess = self.startup_scheduler()
     time.sleep(14)
     self.clusterThread = self.startup_cluster()
Beispiel #7
0
 def __init__(self, _proj: str, _exp: str, appConf: Dict[str, str] = None):
     super(DistributedTestManager, self).__init__(_proj, _exp)
     EdasEnv.update(appConf)
     log_metrics = appConf.get("log_metrics", False)
     self.processManager = ProcessManager.initManager(EdasEnv.parms)
     time.sleep(10)
     self.processing = False
     self.scheduler_info = self.processManager.client.scheduler_info()
     self.workers: Dict = self.scheduler_info.pop("workers")
     self.logger.info(" @@@@@@@ SCHEDULER INFO: " +
                      str(self.scheduler_info))
     self.logger.info(f" N Workers: {len(self.workers)} ")
     for addr, specs in self.workers.items():
         self.logger.info(f"  -----> Worker {addr}: {specs}")
     if log_metrics:
         self.metricsThread = Thread(target=self.trackCwtMetrics)
         self.metricsThread.start()
Beispiel #8
0
 def getHosts(self):
     hostfile = EdasEnv.get("hostfile.path",
                            os.path.expanduser("~/.edas/conf/hosts"))
     assert os.path.isfile(
         hostfile
     ), "Error, the EDAS hosts file '{}' does not exist.  Copy edas/resourses/hosts.template to '{}' and edit.".format(
         hostfile, hostfile)
     with open(hostfile) as f:
         return f.read().split()
Beispiel #9
0
 def validate(cls, _address: str, stype: SourceType = SourceType.uri):
     allowed_sources = [
         r.strip() for r in EdasEnv.get("sources.allowed",
                                        "collection,https").split(",")
     ]
     toks = _address.split(":")
     scheme = toks[0].lower()
     if (stype.name.lower() == "uri") and (scheme in allowed_sources):
         if scheme == "https":
             trusted_servers = [
                 r.strip()
                 for r in EdasEnv.get("trusted.dap.servers", "").split(",")
             ]
             for trusted_server in trusted_servers:
                 if trusted_server in _address: return scheme, toks[1]
             raise Exception(
                 f"Attempt to access untrusted dap server: {_address}\n\t Trusted servers: {trusted_servers}\n\t Use parameter 'trusted.dap.servers' in app.conf to list trusted addresses, e.g. 'trusted.dap.servers=https://aims3.llnl.gov/thredds/dodsC/'"
             )
         else:
             return scheme, toks[1]
     else:
         raise Exception("Unallowed scheme '{}' in url: {}".format(
             scheme, _address))
Beispiel #10
0
variable = "t"

domains = [{"name": "d0"}]
variables = [{
    "uri": f"collection://{collection}:",
    "name": f"{variable}:v0",
    "domain": "d0"
}]
operations = [{"name": "xarray.ave", "input": "v0", "axes": "t"}]
local = True
scheduler = "127.0.0.1:8786"

if __name__ == '__main__':
    print(f"Running test")
    appConf = {"sources.allowed": "collection,https", "log.metrics": "true"}
    EdasEnv.update(appConf)

    if local:
        print(f"Initializing Local Dask cluster")
        client = Client()
    else:
        if scheduler is None:
            cluster = EDASCluster()
            print(
                "Initializing Dask-distributed cluster with scheduler address: "
                + cluster.scheduler_address)
            client = Client(cluster.scheduler_address, timeout=64)
            time.sleep(20)
        else:
            print("Initializing client with existing scheduler at: " +
                  scheduler)
Beispiel #11
0
 def __init__(self):
     self.arrayCache: Dict[str, EDASArray] = OrderedDict()
     self.maxSize = SizeParser.parse(EdasEnv.get("cache.size.max", "500M"))
     self.currentSize = 0
Beispiel #12
0
 def startup_cluster(self):
     if not EdasEnv.getBool("edas.manage.cluster", True): return None
     clusterThread = EDASKClusterThread()
     clusterThread.start()
     return clusterThread
Beispiel #13
0
 def __init__(self, _proj: str, _exp: str, appConf: Dict[str, str] = None):
     super(LocalTestManager, self).__init__(_proj, _exp)
     EdasEnv.update(appConf)
     self.processManager = ProcessManager.initManager(EdasEnv.parms)
Beispiel #14
0
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix,
         use_xheaders, pid_file, scheduler_file, interface, local_directory,
         preload, preload_argv, tls_ca_file, tls_cert, tls_key):
    logger = SchedulerLogger.getLogger()
    enable_proctitle_on_current()
    enable_proctitle_on_children()
    log_metrics = EdasEnv.getBool("log.metrics", False)
    logger.info(f"Log Metrics: {log_metrics}")
    plugins = [EDASSchedulerPlugin(logger)] if log_metrics else []

    sec = Security(
        tls_ca_file=tls_ca_file,
        tls_scheduler_cert=tls_cert,
        tls_scheduler_key=tls_key,
    )

    if not host and (tls_ca_file or tls_cert or tls_key):
        host = 'tls://'

    if pid_file:
        with open(pid_file, 'w') as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    local_directory_created = False
    if local_directory:
        if not os.path.exists(local_directory):
            os.mkdir(local_directory)
            local_directory_created = True
    else:
        local_directory = tempfile.mkdtemp(prefix='scheduler-')
        local_directory_created = True
    if local_directory not in sys.path:
        sys.path.insert(0, local_directory)

    if sys.platform.startswith('linux'):
        import resource  # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    if interface:
        if host:
            raise ValueError("Can not specify both interface and host")
        else:
            host = get_ip_interface(interface)

    addr = uri_from_host_port(host, port, 8786)

    loop = IOLoop.current()
    logger.info('-' * 47)

    services = {}
    if _bokeh:
        try:
            from distributed.bokeh.scheduler import BokehScheduler
            services[('bokeh', bokeh_port)] = (BokehScheduler, {
                'prefix': bokeh_prefix
            })
        except ImportError as error:
            if str(error).startswith('No module named'):
                logger.info(
                    'Web dashboard not loaded.  Unable to import bokeh')
            else:
                logger.info('Unable to import bokeh: %s' % str(error))

    scheduler = Scheduler(loop=loop,
                          services=services,
                          scheduler_file=scheduler_file,
                          security=sec)

    for plugin in plugins:
        logger.info(f"@SP: Adding scheduler plugin: {plugin}")
        scheduler.add_plugin(plugin)
    scheduler.start(addr)
    comm = Comm(scheduler)
    comm.start()
    if not preload:
        preload = dask.config.get('distributed.scheduler.preload', {})
    if not preload_argv:
        preload_argv = dask.config.get('distributed.scheduler.preload-argv',
                                       {})
    preload_modules(preload,
                    parameter=scheduler,
                    file_dir=local_directory,
                    argv=preload_argv)

    logger.info('Local Directory: %26s', local_directory)
    logger.info('-' * 47)
    install_signal_handlers(loop)

    def shutdown_scheduler():
        comm.terminate()
        scheduler.stop()
        if local_directory_created:
            shutil.rmtree(local_directory)
        logger.info("End scheduler at %r", addr)

    def close_loop():
        loop.stop()
        loop.close()
        shutdown_scheduler()

    atexit.register(close_loop)

    try:
        loop.start()
        loop.close()
    finally:
        shutdown_scheduler()
Beispiel #15
0
 def __init__(self, _proj: str, _exp: str, appConf: Dict[str, str] = None):
     super(LocalTestManager, self).__init__(_proj, _exp)
     EdasEnv.update(appConf)