Beispiel #1
0
    def __init__(self,
                 nthreads=0,
                 nprocs=1,
                 nohost=False,
                 remote_python=None,
                 memory_limit=None,
                 worker_port=None,
                 nanny_port=None):
        Thread.__init__(self)
        self.logger = EDASLogger.getLogger()
        self.nthreads = nthreads
        self.nprocs = nprocs
        self.worker_addrs = self.getHosts()

        self.ssh_username = os.environ.get('USER', None)
        self.ssh_port = 22
        self.ssh_private_key = get_private_key()
        self.scheduler_addr = getHost()
        self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786))
        self.logdir = os.path.expanduser("~/.edas/logs")
        self.active = False

        self.nohost = nohost
        self.remote_python = remote_python
        self.memory_limit = memory_limit
        self.worker_port = worker_port
        self.nanny_port = nanny_port

        # Keep track of all running threads
        self.threads = []
Beispiel #2
0
 def __init__(self, _agg: "Aggregation", *args ):
    self.logger = EDASLogger.getLogger()
    self.agg = _agg
    self.start_time = float(args[0].strip())
    self.size = int(args[1].strip())
    self.relpath = args[2].strip()
    self.date = datetime.fromtimestamp( self.start_time*60, tz=timezone.utc)
Beispiel #3
0
 def __init__(self, _name, _spec_file ):
     self.logger = EDASLogger.getLogger()
     self.name = _name
     self.spec = os.path.expanduser( _spec_file )
     self.aggs = {}
     self.parms = {}
     self._parseSpecFile()
Beispiel #4
0
 def __init__(self, serverConfiguration: Dict[str, str]):
     self.config = serverConfiguration
     self.logger = EDASLogger.getLogger()
     self.num_wps_requests = 0
     self.scheduler_address = serverConfiguration.get(
         "scheduler.address", None)
     self.submitters = []
     self.active = True
     if self.scheduler_address is not None:
         self.logger.info(
             "Initializing Dask-distributed cluster with scheduler address: "
             + self.scheduler_address)
         self.client = Client(self.scheduler_address, timeout=60)
     else:
         nWorkers = int(
             self.config.get("dask.nworkers", multiprocessing.cpu_count()))
         self.client = Client(LocalCluster(n_workers=nWorkers))
         self.scheduler_address = self.client.scheduler.address
         self.logger.info(
             f"Initializing Local Dask cluster with {nWorkers} workers,  scheduler address = {self.scheduler_address}"
         )
         self.client.submit(lambda x: edasOpManager.buildIndices(x),
                            nWorkers)
     self.ncores = self.client.ncores()
     self.logger.info(f" ncores: {self.ncores}")
     self.scheduler_info = self.client.scheduler_info()
     self.workers: Dict = self.scheduler_info.pop("workers")
     self.logger.info(f" workers: {self.workers}")
     log_metrics = serverConfiguration.get("log.scheduler.metrics", False)
     if log_metrics:
         self.metricsThread = Thread(target=self.trackMetrics)
         self.metricsThread.start()
Beispiel #5
0
 def __init__(self, host, request_port, response_port):
     Thread.__init__(self)
     self.logger = EDASLogger.getLogger()
     self._response_port = response_port
     self._request_port = request_port
     self._host = host
     self.process = None
     self.setDaemon(True)
Beispiel #6
0
 def __init__( self, name: Optional[str], _domId: str, data: Union[xa.DataArray,DataArrayGroupBy] ):
     self.alwaysPersist = False
     self.loaded_data = None
     self.logger = EDASLogger.getLogger()
     self.domId = _domId if _domId is not None else ""
     self._data = data
     self.name = name
     self.addDomain( _domId )
Beispiel #7
0
 def __init__( self, spec: KernelSpec ):
     self.logger = EDASLogger.getLogger()
     self._spec: KernelSpec = spec
     self.parent: Optional[str] = None
     self._minInputs = 1
     self._maxInputs = 100000
     self.requiredOptions = []
     self._id: str  = self._spec.name + "-" + ''.join([ random.choice( string.ascii_letters + string.digits ) for n in range(5) ] )
Beispiel #8
0
 def __init__(self, **kwargs):
     super(EDASEndpoint, self).__init__()
     self.logger = EDASLogger.getLogger()
     self.process = "edas"
     self.handlers = {}
     self.processManager = None
     self._epas = ["edas*", "xarray*"]
     atexit.register(self.shutdown, "ShutdownHook Called")
Beispiel #9
0
 def __init__(self):
     Cluster.__init__(self)
     self.logger = EDASLogger.getLogger()
     self.scheduler_host = getHost()
     self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786))
     self.schedulerProcess = self.startup_scheduler()
     time.sleep(14)
     self.clusterThread = self.startup_cluster()
Beispiel #10
0
 def __init__(self, clientId: str, jobId: str, **kwargs):
     self.logger = EDASLogger.getLogger()
     self.clientId = clientId
     self.jobId = jobId
     self.cacheDir = kwargs.get("cache", "/tmp")
     self.workers = kwargs.get("workers", 1)
     self.start_time = time.time()
     self.filePath = self.cacheDir + "/" + Job.randomStr(6) + ".nc"
Beispiel #11
0
 def __init__(self, _name, _agg_file ):
     self.logger = EDASLogger.getLogger()
     self.name = _name
     self.spec = _agg_file
     self.parms = {}
     self.files: Dict[str,File] = OrderedDict()
     self.axes: Dict[str,Axis] = {}
     self.dims = {}
     self.vars = {}
     self._parseAggFile()
Beispiel #12
0
 def __init__(self, _context: zmq.Context, _client_address: str,
              _response_port: int):
     super(Responder, self).__init__()
     self.logger = EDASLogger.getLogger()
     self.context: zmq.Context = _context
     self.response_port = _response_port
     self.executing_jobs: Dict[str, Response] = {}
     self.status_reports: Dict[str, str] = {}
     self.clients: Set[str] = set()
     self.client_address = _client_address
     self.initSocket()
Beispiel #13
0
 def __init__(self):
     self.logger =  EDASLogger.getLogger()
     self.EDAS_CONFIG_DIR = os.environ.get('EDAS_CONFIG_DIR',os.path.expanduser("~/.edas/conf" ) )
     assert os.path.isdir( self.EDAS_CONFIG_DIR ), f"Error, the EDAS configuration directory '{self.EDAS_CONFIG_DIR}' does not exist"
     self.path = os.path.expanduser( os.path.join( self.EDAS_CONFIG_DIR, "app.conf" ) )
     assert os.path.isfile( self.path ), f"Error, the EDAS configuration file '{self.path}' does not exist.  Copy edas/resourses/app.conf.template to '{self.path}' and edit."
     aliases = { "wps.server.address": "client.address", "scheduler.address": "dask.scheduler" }
     self._parms: Dict[str,str] = self.getAppConfiguration( aliases )
     self.TRANSIENTS_DIR =  os.environ.get('EDAS_CACHE_DIR', self._parms.get( "edas.transients.dir", self._parms.get( "edas.cache.dir",  "/tmp" ) ) )
     self.COLLECTIONS_DIR = self._parms.get("edas.coll.dir", "~/.edas" )
     for cpath in [self.TRANSIENTS_DIR, self.COLLECTIONS_DIR]:
         if not os.path.exists(cpath): os.makedirs(cpath)
Beispiel #14
0
 def init(cls, project: str, experiment: str, requestId: str,
          identifier: str, dataInputs: Dict[str, List[Dict[str, Any]]]):
     logger = EDASLogger.getLogger()
     logger.info(
         "TaskRequest>-> process_name: {}, requestId: {}, datainputs: {}".
         format(identifier, requestId, str(dataInputs)))
     uid = UID(requestId)
     domainManager = DomainManager.new(dataInputs.get("domain"))
     variableManager = VariableManager.new(dataInputs.get("variable"))
     operationManager = OperationManager.new(dataInputs.get("operation"),
                                             domainManager, variableManager)
     rv = TaskRequest(uid, project, experiment, identifier,
                      operationManager, {})
     return rv
Beispiel #15
0
 def __init__(self, cid: str, _job: Job, **kwargs):
     super(TaskExecHandler, self).__init__(**{
         "rid": _job.requestId,
         "cid": cid,
         **kwargs
     })
     self.logger = EDASLogger.getLogger()
     self.sthread = None
     self._processResults = True
     self.results = queue.Queue()
     self.job = _job
     self._status = Status.IDLE
     self.start_time = time.time()
     self._exception = None
Beispiel #16
0
 def __init__(self, requestId: str, project: str, experiment: str,
              process: str, datainputs: Dict[str, List[Dict[str, Any]]],
              inputs: List[TaskResult], runargs: Dict[str, str],
              priority: float):
     self.logger = EDASLogger.getLogger()
     self.requestId = requestId
     self.process = process
     self.project = project
     self.inputs = inputs
     self.experiment = experiment
     self.dataInputs = datainputs
     self.runargs = runargs
     self.priority = priority
     self.workerIndex = 0
     self.logger.info(f"Create job, runargs = {runargs}")
Beispiel #17
0
    def new(cls, dataset: xa.Dataset, varMap: Dict[str,str]=None, idMap=None ):
        logger = EDASLogger.getLogger()
        if varMap is None: varMap = {}
        if idMap is None: idMap = {}
#        logger.info( f"$$$$$$$$$$ rename: idMap = {idMap}, initial coords = {list(dataset.coords.keys())}")
        dataset = cls.rename( dataset, idMap )
#        logger.info(f"$$$$$$$$$$ rename: Result = {list(dataset.coords.keys())}")
        result = OrderedDict()
        if varMap:
             for ( vid, domId ) in varMap.items():
                 result[vid] = EDASArray( vid, domId, dataset[vid] )
        else:
            for ( vid ) in dataset.variables.keys():
                result[vid] = EDASArray( vid, None, dataset[vid] )
        return EDASDataset( result, dataset.attrs )
Beispiel #18
0
 def new(cls, job: Job):
     logger = EDASLogger.getLogger()
     logger.info(
         "TaskRequest--> process_name: {}, requestId: {}, datainputs: {}".
         format(job.process, job.requestId, str(job.dataInputs)))
     uid = UID(job.requestId)
     domainManager = DomainManager.new(job.dataInputs.get("domain"))
     variableManager = VariableManager.new(
         job.dataInputs.get("variable", job.dataInputs.get("input")),
         job.inputs)
     operationManager = OperationManager.new(
         job.dataInputs.get("operation"), domainManager, variableManager)
     rv = TaskRequest(uid, job.project, job.experiment, job.process,
                      operationManager, job.runargs)
     return rv
Beispiel #19
0
 def __init__(self,
              _name: str,
              _start: Union[float, int, str],
              _end: Union[float, int, str],
              _step: Union[float, int, str],
              _system: str,
              _metadata: Dict,
              timeDelta: Optional[relativedelta] = None):
     self.name = _name
     self.logger = EDASLogger.getLogger()
     self.type = Axis.parse(_name)
     self.system = _system
     self.start = _start
     self._timeDelta = timeDelta
     self.end = _end
     self.step = _step
     self.metadata = _metadata
Beispiel #20
0
    def __init__(self, client_address: str, request_port: int,
                 response_port: int):
        self.logger = EDASLogger.getLogger()
        self.active = True

        try:
            self.request_port = request_port
            self.zmqContext: zmq.Context = zmq.Context()
            self.request_socket: zmq.Socket = self.zmqContext.socket(zmq.REP)
            self.responder = Responder(self.zmqContext, client_address,
                                       response_port)
            self.handlers = {}
            self.initSocket(client_address, request_port)

        except Exception as err:
            self.logger.error(
                "@@Portal:  ------------------------------- EDAS Init error: {} ------------------------------- "
                .format(err))
Beispiel #21
0
 def __init__(self, context: zmq.Context, clientId: str, host: str,
              port: int, **kwargs):
     from edas.config import EdasEnv
     Thread.__init__(self)
     self.context = context
     self.logger = EDASLogger.getLogger()
     self.host = host
     self.port = port
     self.clientId = clientId
     self.active = True
     self.mstate = MessageState.RESULT
     self.setName('EDAS Response Thread')
     self.cached_results = {}
     self.cached_arrays = {}
     self.filePaths = {}
     self.diag = bool(kwargs.get("diag", False))
     self.setDaemon(True)
     self.cacheDir = EdasEnv.CACHE_DIR
     self.log("Created RM, cache dir = " + self.cacheDir)
Beispiel #22
0
    def __init__(self,
                 host: str = "127.0.0.1",
                 request_port: int = 4556,
                 response_port: int = 4557,
                 **kwargs):
        try:
            self.active = True
            self.app_host = host
            self.application_thread = None
            self.clientID = UID.randomId(6)
            self.logger = EDASLogger.getLogger()
            self.context = zmq.Context()
            self.request_socket = self.context.socket(zmq.REQ)

            # if( connectionMode == ConnectionMode.BIND ):
            #     self.request_port = ConnectionMode.bindSocket( self.request_socket, self.app_host, request_port )
            #     self.response_port = ConnectionMode.bindSocket( self.response_socket, self.app_host, response_port )
            #     self.logger.info( "Binding request socket to port: {0} (Requested {1})".format( self.request_port, request_port ) )
            #     self.logger.info( "Binding response socket to port: {0} (Requested {1}".format( self.response_port, response_port ) )
            # else:

            self.request_port = ConnectionMode.connectSocket(
                self.request_socket, self.app_host, request_port)
            self.log("[1]Connected request socket to server {0} on port: {1}".
                     format(self.app_host, self.request_port))

            self.response_manager = ResponseManager(self.context,
                                                    self.clientID, host,
                                                    response_port, **kwargs)
            self.response_manager.start()

        except Exception as err:
            err_msg = "\n-------------------------------\nWorker Init error: {0}\n{1}-------------------------------\n".format(
                err, traceback.format_exc())
            self.logger.error(err_msg)
            print(err_msg)
            self.shutdown()
Beispiel #23
0
import dask
from dask.distributed import Client
from typing import List, Dict, Sequence, Mapping, Any
import xarray as xa
import time, traceback, logging
import numpy as np
from edas.workflow.modules.edas import *
from edas.util.logging import EDASLogger
from edas.workflow.module import edasOpManager
from edas.portal.parsers import WpsCwtParser

if __name__ == '__main__':
    logger = EDASLogger.getLogger()
    logger.info("STARTUP")
    dataset_path = '/Users/tpmaxwel/Dropbox/Tom/Data/GISS/CMIP5/E2H/r1i1p1/*.nc'
    dataset_ncml = '/Users/tpmaxwel/.edas/cache/collection/agg/giss_r1i1p1-tas_Amon_GISS-E2-H_historical_r1i1p1_1.ncml'

    testStr = '[ domain=[ {"name":"d0",   \n   "lat":{"start":0.0,  "end":20.0, "system":"values" }, "lon":{ "start":0.0,"end":20.0, "system":"values" }, "time":{ "start":0,"end":20, "system":"indices" } } ], ' \
              'variable=[{ "collection":"cip_merra2_mon_1980-2015", "name":"tas:v0", "domain":"d0" } ], ' \
              'operation=[{ "name":"edas.ave", "input":"v0", "domain":"d0","axes":"xy"}] ]'

    try:
        tstart = time.time()
        client = Client()

        tdefine = time.time()
        logger.info("Defining workflow")

        def get_results() -> List[xa.Dataset]:
            dataInputs = WpsCwtParser.parseDatainputs(testStr)
            request: TaskRequest = TaskRequest.new("requestId", "jobId",
Beispiel #24
0
 def __init__(self, _project: str, _experiment: str,
              appConfiguration: Dict[str, str]):
     self.logger = EDASLogger.getLogger()
     self.project = _project
     self.experiment = _experiment
     self.processManager = ProcessManager(appConfiguration)
Beispiel #25
0
 def __init__(self, job: Job, processResults, processFailure):
     Thread.__init__(self)
     self.job = job
     self.processResults = processResults
     self.processFailure = processFailure
     self.logger = EDASLogger.getLogger()
Beispiel #26
0
 def __init__(self, name, kernels: Dict[str, Callable[[], Kernel]]):
     self.logger = EDASLogger.getLogger()
     self._kernels: Dict[str, Callable[[str], Kernel]] = kernels
     self._instances: Dict[str, Kernel] = {}
     OperationModule.__init__(self, name)
Beispiel #27
0
class WpsCwtParser:
    logger = EDASLogger.getLogger()

    integer = (Optional("-") + Word(nums)).setParseAction(str2int)
    float = (Optional("-") + Word(nums + ".") +
             Optional(CaselessLiteral("E") + Optional("-") +
                      Word(nums))).setParseAction(str2float)
    numval = integer ^ float
    key = QuotedString('"')
    name = Word(alphanums + "-")
    token = key ^ numval
    delim = Word(",") ^ Word(";")
    vsep = Word("|") ^ Word(":")

    @classmethod
    def getDatainputsParser(cls):
        dict = cls.keymap(cls.key, cls.token)
        spec = cls.keymap(cls.key, dict ^ cls.token ^ cls.list(cls.token))
        return cls.keymap(cls.name, cls.list(spec), "[]", "=")

    @classmethod
    def getOpConnectionsParser(cls):
        output = Suppress(cls.vsep) + cls.name
        input = cls.seq(cls.name)
        item = input + Optional(Group(output))
        return cls.seq(Group(item))

    @classmethod
    def strToDatetime(cls, dTime: str) -> datetime:
        import dateparser
        dt: datetime = dateparser.parse(dTime, settings={'TIMEZONE': 'UTC'})
        return datetime

    @classmethod
    def isoDateStr(cls, dTime: str) -> str:
        import dateparser
        dt: datetime.datetime = dateparser.parse(
            dTime)  # , settings={'TIMEZONE': 'UTC'} )
        return dt.replace(microsecond=0).isoformat(
            "T", timespec="seconds").split("+")[0]

    @classmethod
    def parseDatainputs(cls, datainputs) -> Dict[str, List[Dict[str, Any]]]:
        try:
            return cls.getDatainputsParser().parseString(datainputs)[0]
        except ParseException as err:
            cls.logger.error(
                "\n\n -----> Error parsing input at col {}: '{}'\n".format(
                    err.col, err.line))
            raise err

    @classmethod
    def parseOpConnections(cls, opConnections) -> List[List[List[str]]]:
        try:
            opCon = ",".join(opConnections) if hasattr(
                opConnections, '__iter__') and not isinstance(
                    opConnections, str) else opConnections
            return cls.getOpConnectionsParser().parseString(str(opCon))[0]
        except ParseException as err:
            cls.logger.error(
                "\n\n -----> Error parsing input at col {}: '{}'\n".format(
                    err.col, err.line))
            raise err

    @classmethod
    def keymap(cls, key: Token, value: Token, enclosing: str = "{}", sep=":"):
        elem = (key + Suppress(sep) + value + Suppress(ZeroOrMore(cls.delim)))
        return (Suppress(enclosing[0]) + OneOrMore(Group(elem)) +
                Suppress(enclosing[1])).setParseAction(list2dict)

    @classmethod
    def list(cls, item, enclosing: str = "[]"):
        elem = item + Suppress(ZeroOrMore(cls.delim))
        return (Suppress(enclosing[0]) + Group(OneOrMore(elem)) +
                Suppress(enclosing[1]))

    @classmethod
    def seq(cls, item):
        elem = item + Suppress(ZeroOrMore(cls.delim))
        return Group(OneOrMore(elem))

    @classmethod
    def postProcessResult(
        cls, result: Dict[str, List[Dict[str, Any]]]
    ) -> Dict[str, List[Dict[str, Any]]]:
        # for key, decls in result.items():
        #     for decl in decls:
        #         print(".")
        return result

    @staticmethod
    def get(altKeys: List[str], spec: Dict[str, Any]) -> Any:
        for key in altKeys:
            value = spec.get(key, None)
            if value is not None: return value
        return None

    @staticmethod
    def split(sepKeys: List[str], value: str) -> List[str]:
        for sep in sepKeys:
            if sep in value:
                return value.split(sep)
        return [value]

    @staticmethod
    def randomStr(length) -> str:
        tokens = string.ascii_uppercase + string.ascii_lowercase + string.digits
        return ''.join(random.SystemRandom().choice(tokens)
                       for _ in range(length))
Beispiel #28
0
 def __init__(self):
     self.logger = EDASLogger.getLogger()
     self.operation_modules: Dict[str, KernelModule] = {}
     self.utilNodes = {"edas.metrics"}
     self.build()
Beispiel #29
0
 def __init__(self):
     self.logger = EDASLogger.getLogger()
     self.mgr = LocalTestManager("PlotTESTS", "demo")
Beispiel #30
0
 def __init__(self, name: str):
     self.logger = EDASLogger.getLogger()
     self._name = name