Ejemplo n.º 1
0
 def __init__(self,
              client_address: str = None,
              request_port: int = None,
              response_port: int = None):
     super(EDASapp, self).__init__(
         get_or_else(client_address, EdasEnv.get("wps.server.address",
                                                 "*")),
         get_or_else(request_port, EdasEnv.get("request.port", 4556)),
         get_or_else(response_port, EdasEnv.get("response.port", 4557)))
     self.process = "edas"
     self.processManager = None
     atexit.register(self.term, "ShutdownHook Called")
     self.logger.info("STARTUP CLUSTER")
     self.processManager = ProcessManager.initManager(EdasEnv.parms)
     self.scheduler_info = self.processManager.client.scheduler_info()
     workers: Dict = self.scheduler_info.pop("workers")
     self.logger.info(" @@@@@@@ SCHEDULER INFO: " +
                      str(self.scheduler_info))
     self.logger.info(f" N Workers: {len(workers)} ")
     for addr, specs in workers.items():
         self.logger.info(f"  -----> Worker {addr}: {specs}")
     log_metrics = EdasEnv.parms.get("log.cwt.metrics", True)
     if log_metrics:
         self.metricsThread = Thread(target=self.trackCwtMetrics)
         self.metricsThread.start()
Ejemplo n.º 2
0
 def getSession(self, dataSource: DataSource) -> Session:
     session: Session = None
     if dataSource.auth == "esgf":
         from pydap.cas.esgf import setup_session
         openid = EdasEnv.get("esgf.openid", "")
         password = EdasEnv.get("esgf.password", "")
         username = EdasEnv.get("esgf.username", openid.split("/")[-1])
         session = setup_session(openid,
                                 password,
                                 username,
                                 check_url=dataSource.address)
     elif dataSource.auth == "urs":
         from pydap.cas.urs import setup_session
         username = EdasEnv.get("urs.username", "")
         password = EdasEnv.get("urs.password", "")
         session = setup_session(username,
                                 password,
                                 check_url=dataSource.address)
     elif dataSource.auth == "cookie":
         from pydap.cas.get_cookies import setup_session
         username = EdasEnv.get("auth.username", "")
         password = EdasEnv.get("auth.password", "")
         auth_url = EdasEnv.get("auth.url", "")
         session = setup_session(auth_url, username, password)
     elif dataSource.auth is not None:
         raise Exception("Unknown authentication method: " +
                         dataSource.auth)
     return session
Ejemplo n.º 3
0
    def __init__(self,
                 nthreads=0,
                 nprocs=1,
                 nohost=False,
                 remote_python=None,
                 memory_limit=None,
                 worker_port=None,
                 nanny_port=None):
        Thread.__init__(self)
        self.logger = EDASLogger.getLogger()
        self.nthreads = nthreads
        self.nprocs = nprocs
        self.worker_addrs = self.getHosts()

        self.ssh_username = os.environ.get('USER', None)
        self.ssh_port = 22
        self.ssh_private_key = get_private_key()
        self.scheduler_addr = getHost()
        self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786))
        self.logdir = os.path.expanduser("~/.edas/logs")
        self.active = False

        self.nohost = nohost
        self.remote_python = remote_python
        self.memory_limit = memory_limit
        self.worker_port = worker_port
        self.nanny_port = nanny_port

        # Keep track of all running threads
        self.threads = []
Ejemplo n.º 4
0
    def buildWorkflow(self, request: TaskRequest, node: WorkflowNode, inputs: EDASDatasetCollection )  -> EDASDatasetCollection:
        snode: SourceNode = node
        results = EDASDatasetCollection( "InputKernel.build-" + node.name )
        t0 = time.time()
        dset = self.getCachedDataset( snode )
        if dset is not None:
            self.importToDatasetCollection(results, request, snode, dset.xr )
            self.logger.info( "Access input data from cache: " + dset.id )
        else:
            dataSource: DataSource = snode.varSource.dataSource
            if dataSource.type == SourceType.collection:
                from edas.collection.agg import Axis as AggAxis, File as AggFile
                collection = Collection.new( dataSource.address )
                self.logger.info("Input collection: " + dataSource.address )
                aggs = collection.sortVarsByAgg( snode.varSource.vids )
                domain = request.operationManager.domains.getDomain( snode.domain )
                if domain is not None:
                    timeBounds = domain.findAxisBounds(Axis.T)
                    startDate = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.start)
                    endDate   = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.end)
                else: startDate = endDate = None
                for ( aggId, vars ) in aggs.items():
                    use_chunks = True
                    pathList = collection.pathList(aggId) if startDate is None else collection.periodPathList(aggId,startDate,endDate)
                    assert len(pathList) > 0, f"No files found in aggregation {aggId} for date range {startDate} - {endDate} "
                    nFiles = len(pathList)
                    if use_chunks:
                        nReadPartitions = int( EdasEnv.get( "mfdataset.npartitions", 250 ) )
                        agg = collection.getAggregation(aggId)
                        nchunks, fileSize = agg.getChunkSize( nReadPartitions, nFiles )
                        chunk_kwargs = {} if nchunks is None else dict(chunks={"time": nchunks})
                        self.logger.info( f"Open mfdataset: vars={vars}, NFILES={nFiles}, FileSize={fileSize}, FILES[0]={pathList[0]}, chunk_kwargs={chunk_kwargs}, startDate={startDate}, endDate={endDate}, domain={domain}" )
                    else:
                        chunk_kwargs = {}
                        self.logger.info( f"Open mfdataset: vars={vars},  NFILES={nFiles}, FILES[0]={pathList[0]}" )
                    dset = xr.open_mfdataset( pathList, engine='netcdf4', data_vars=vars, parallel=True, **chunk_kwargs )
                    self.logger.info(f"Import to collection")
                    self.importToDatasetCollection( results, request, snode, dset )
                    self.logger.info(f"Collection import complete.")
            elif dataSource.type == SourceType.file:
                self.logger.info( "Reading data from address: " + dataSource.address )
                files = glob.glob( dataSource.address )
                parallel = len(files) > 1
                assert len(files) > 0, f"No files matching path {dataSource.address}"
                dset = xr.open_mfdataset(dataSource.address, engine='netcdf4', data_vars=snode.varSource.ids, parallel=parallel )
                self.importToDatasetCollection(results, request, snode, dset)
            elif dataSource.type == SourceType.archive:
                self.logger.info( "Reading data from archive: " + dataSource.address )
                dataPath =  request.archivePath( dataSource.address )
                dset = xr.open_mfdataset( [dataPath] )
                self.importToDatasetCollection(results, request, snode, dset)
            elif dataSource.type == SourceType.dap:
                nchunks = request.runargs.get( "ncores", 8 )
                self.logger.info( f" --------------->>> Reading data from address: {dataSource.address}, nchunks = {nchunks}" )
#                dset = xr.open_mfdataset( [dataSource.address], engine="netcdf4", data_vars=snode.varSource.ids, chunks={"time":nchunks} )
                dset = xr.open_dataset( dataSource.address, engine="netcdf4", chunks={"time":nchunks} )
                self.importToDatasetCollection( results, request, snode, dset )
            self.logger.info( f"Access input data source {dataSource.address}, time = {time.time() - t0} sec" )
            self.logger.info( "@L: LOCATION=> host: {}, thread: {}, proc: {}".format( socket.gethostname(), threading.get_ident(), os.getpid() ) )
        return results
Ejemplo n.º 5
0
 def __init__(self):
     Cluster.__init__(self)
     self.logger = EDASLogger.getLogger()
     self.scheduler_host = getHost()
     self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786))
     self.schedulerProcess = self.startup_scheduler()
     time.sleep(14)
     self.clusterThread = self.startup_cluster()
Ejemplo n.º 6
0
 def getHosts(self):
     hostfile = EdasEnv.get("hostfile.path",
                            os.path.expanduser("~/.edas/conf/hosts"))
     assert os.path.isfile(
         hostfile
     ), "Error, the EDAS hosts file '{}' does not exist.  Copy edas/resourses/hosts.template to '{}' and edit.".format(
         hostfile, hostfile)
     with open(hostfile) as f:
         return f.read().split()
Ejemplo n.º 7
0
 def validate(cls, _address: str, stype: SourceType = SourceType.uri):
     allowed_sources = [
         r.strip() for r in EdasEnv.get("sources.allowed",
                                        "collection,https").split(",")
     ]
     toks = _address.split(":")
     scheme = toks[0].lower()
     if (stype.name.lower() == "uri") and (scheme in allowed_sources):
         if scheme == "https":
             trusted_servers = [
                 r.strip()
                 for r in EdasEnv.get("trusted.dap.servers", "").split(",")
             ]
             for trusted_server in trusted_servers:
                 if trusted_server in _address: return scheme, toks[1]
             raise Exception(
                 f"Attempt to access untrusted dap server: {_address}\n\t Trusted servers: {trusted_servers}\n\t Use parameter 'trusted.dap.servers' in app.conf to list trusted addresses, e.g. 'trusted.dap.servers=https://aims3.llnl.gov/thredds/dodsC/'"
             )
         else:
             return scheme, toks[1]
     else:
         raise Exception("Unallowed scheme '{}' in url: {}".format(
             scheme, _address))
Ejemplo n.º 8
0
 def startup_scheduler(self):
     os.environ["PATH"] = ":".join([self.EDAS_BIN_DIR, os.environ["PATH"]])
     if not EdasEnv.getBool("edas.manage.scheduler", True): return None
     #        os.environ["PKEY_OPTS"]  = "--ssh-private-key=" + get_private_key()
     os.environ["PATH"] = ":".join([self.EDAS_BIN_DIR, os.environ["PATH"]])
     bokeh_port = int(EdasEnv.get("dashboard.port", 8787))
     self.logger.info(
         "Starting up scheduler using script {} with host {} and port {}".
         format(self.SCHEDULER_SCRIPT, self.scheduler_host,
                self.scheduler_port))
     args = [
         sys.executable, self.SCHEDULER_SCRIPT, "--host",
         self.scheduler_host, "--port",
         str(self.scheduler_port), "--bokeh-port",
         str(bokeh_port)
     ]
     return subprocess.Popen(args, stderr=subprocess.PIPE)
Ejemplo n.º 9
0
 def __init__(self):
     self.arrayCache: Dict[str, EDASArray] = OrderedDict()
     self.maxSize = SizeParser.parse(EdasEnv.get("cache.size.max", "500M"))
     self.currentSize = 0