def __init__(self, taskName=None, parameterDict={}, model=None, hardwareRequirements={}, filePath=None, executeFunction=None, configFile=None): """! Instantiates a SpecificDeviceTask. Information about the configuration have to be either provided in the function call or via a configFile. @param taskName (optional) Name of the task to be stored in the LogServer @param parameterDict Dict of device names and associated parameters to be used in training @param model (optional) Model to be executed @param hardwareRequirements (optional) mandatory hardware requirements @param filePath the path to the file to be called for execution on the device @param executeFunction name of function, which shoule be executed in filePath @param configFile (optional) """ self._parameterDict = parameterDict self._model = model self._filePath = filePath self._hardwareRequirements = hardwareRequirements self._configFile = configFile self._executeFunction = executeFunction self._taskName = taskName self.logger = LogServer(__name__) self.logger.log().info("SpecificDeviceTask initiated") self.checkConfig()
def __init__( self , name , ipAdress , port , dartRuntime = None , physicalName = None , hardwareConfig = None , taskDict = {} , initTask = None ): self.name = name if physicalName == None: self.phyiscalName = self.name self.ipAdress = ipAdress self.port = port self._hardwareConfig = hardwareConfig self._openTaskDict = taskDict self._finishedTaskDict = {} self._dartRuntime = dartRuntime self._initTask = initTask if initTask is not None: self._initialized = False else: self._initialized = True self.logger = LogServer(__name__) self.logger.log().info("DeviceSingle " + name + " instantiated")
def convertDart2Python(cls, results, deviceName): device_result = { 'duration': None , 'result': None } resultID = None logger = LogServer(__name__) for result in results['results']: workerName = result['worker'].split("-",1)[0] if 'success' in result.keys() and deviceName == workerName: device_result['duration'] = result['duration'] device_result['result'] = cls.unpackBackMessage(result['success']) resultID = result['id'] if 'error' in result.keys() and deviceName == workerName: device_result['duration'] = result['duration'] device_result['result'] = {"error": result['error']} resultID = result['id'] logstring = "" if device_result['result'] is not None: logstring = logstring + str(device_result['duration']) + " " logstring = logstring + str(resultID) + " " for keys,values in device_result['result'].items(): logstring = logstring + str(keys) + " " logstring = logstring + str(values) logger.log().debug("MessageTranslator.convertDart2Python " + logstring ) return device_result, resultID
def __init__( self , server , client_key , testMode , errorProbability , maximal_number_devices = -1 , maximalNumberOpenJobs = 10 , **kwargs ): """! @param maximal_number_devices maximal number of devices for this runtime @param maximalNumberOpenJobs maximal number of allowed maximal jobs @param registeredDevices dict with key device name and value instance of class device @param messageTranslator translator between Python and DART format @param selector instance of selector @param server the server addr, e.g., "https://127.0.0.1:7777" @param client_key the key of the client for identification (unused atm) @param counterJobs int number of open jobs on server """ if testMode: self._restAPIClient = Client(server, client_key, probability_error = errorProbability, testmode = True) else: self._restAPIClient = Client(server, client_key) self._maximal_number_devices = maximal_number_devices self._maximalNumberOpenJobs = maximalNumberOpenJobs self._registeredDevices = {} self._messageTranslator = MessageTranslator() self._selector = None self._counterJobs = 0 #in our case Jobs = Task self.logger = LogServer(__name__) self.logger.log().info("DartRuntime initiated")
def __init__(self, taskName=None, parameterlists={}, model=None, hardwareRequirements={}, filePath=None, configFile=None, numDevices=-1): """! Instantiates a DefaultTask. Information about the configuration have to be either provided in the function call or via a configFile. @param taskName (optional) Name of the task to be stored in the LogServer @param parameterlists List of parameters to be used in training @param model (optional) Model to be executed @param hardwareRequirements (optional) mandatory hardware requirements @param filePath the path to the file to be called for execution on the device @param configFile (optional) the configuration file of this task @param numDevices the total amount of devices on which the task shall to be executed """ self._parameterlists = parameterlists self._model = model self._hardwareRequirements = hardwareRequirements self._numDevices = numDevices self._configFile = configFile self._filePath = filePath self._taskName = taskName self.logger = LogServer(__name__) self.logger.log().info('DefaultTask initiated') self.checkConfig()
def convertPython2Dart(cls, list_client, list_params): """convert default message to dart format and return feasible format""" task_list = [] logger = LogServer(__name__) #TODO: serialize parameters logstring = "" for client, params in zip(list_client, list_params): dict_client = {'location': client, 'parameter': cls.packMessage(params)} logstring = logstring + " " + str({'location': client, 'parameter': params}) task_list.append(dict_client) logger.log().debug("MessageTranslator.convertPython2Dart " + logstring) return task_list
def __init__(self, devices, task, maxSizeDeviceHolder=10, maxNumDeviceHolder=5, maxNumChildAggregators=5, logServer=None): """! @param task instance of a task @param deviceHolders list of deviceHolders @param childAggregators list of childAggregators @param maxSizeDeviceHolder maximum number of devices for deviceHolder @param maxNumDeviceHolder maximum number of deviceHolders per aggregator/childaggregator @param maxNumChildAggregators maximum number of allowed childAggregators @param aggregatedResult aggregated result of local task results @param logServer storage of the results and/or aggregated result """ self._task = task self._maxSizeDeviceHolder = maxSizeDeviceHolder self._maxNumDeviceHolder = maxNumDeviceHolder self._maxNumChildAggregators = maxNumChildAggregators self._deviceHolders = [] self._childAggregators = [] numDevices = task.numDevices if maxNumChildAggregators > 0: amount_childAggregators = self.compute_required_childAggregators_count( numDevices) self._childAggregators = [ DeviceAggregator( [ ] #init at first without devices; devices will be added at the end of constructor , task, maxSizeDeviceHolder=maxSizeDeviceHolder, maxNumDeviceHolder=maxNumDeviceHolder, maxNumChildAggregators=0, logServer=logServer) for _ in range(amount_childAggregators) ] self._aggregatedResult = None self._logServer = logServer self._instantiateDeviceHolders() for device in devices: self.addSingleDevice(device) #add here task to devices self.logger = LogServer(__name__) self.logger.log().info("Aggregator initiated")
def __init__(self, server, client_key, probability_error=0, testmode=False): self.server = server self.key = client_key self.logger = LogServer(__name__) self.logger.log().info("dart client initialized") self.testmode = testmode if self.testmode: self.probability_error = probability_error self.worker_list = [] self.job_list = []
def __init__(self, taskName, parameterlists={}, model=None, hardwareRequirements={}, filePath=None, configFile=None): self._parameterlists = parameterlists self._model = model self._hardwareRequirements = hardwareRequirements self._configFile = configFile self._filePath = filePath self._taskName = taskName self.logger = LogServer(__name__) self.logger.log().info('SpecificParameterTask initiated') self.checkConfig()
def __init__(self, runtime=None, maxSizeDeviceHolder=-1, initTask=None): """! Instantiate a Selector (singleton). @param runtime The runtime for the connections to physical devices @param maxSizeDeviceHolder Maximal amount of devices in a device holder @param devices List of connected devices @param aggregators List of aggregators @param device_holders List of device_holders @param taskQueue List of tasks in queue @param initTask task which must be executed at each device firstly """ self._runtime = runtime self._maxSizeDeviceHolder = maxSizeDeviceHolder self._devices = [] self._aggregators = [] self._device_holders = [] self._taskQueue = [] self._initTask = initTask self.logger = LogServer(__name__) self.logger.log().info('Selector initiated')
def __init__(self, testMode=False, errorProbability=0, logLevel=3, maxSizeDeviceHolder=10, maximalNumberOpenJobs=10): """! @param maxSizeDeviceHolder maximal size for deviceHolders @todo: can runtime and maxSizeDeviceHolder moved to selector ? Or first create selector and than add runtime to that ? better if we have multiple server ? """ self._runtime = None self._selector = None self._maxSizeDeviceHolder = maxSizeDeviceHolder self._maximalNumberOpenJobs = maximalNumberOpenJobs self._initTask = None self._testMode = testMode self._currentDeviceNames = [] self._errorProbability = int(errorProbability) loglevel = LogServer.ERROR if int(logLevel) == 0: loglevel = LogServer.DEBUG elif int(logLevel) == 1: loglevel = LogServer.INFO elif int(logLevel) == 3: loglevel = LogServer.ERROR else: loglevel = LogServer.FATAL self.logger = LogServer(__name__, console_level=loglevel, file_level=LogServer.DEBUG) self.logger.log().info("Workflow manager initiated")
class Client: ## # Initializes the client # @param server the server addr, e.g., "https://127.0.0.1:7777" # @param client_key the key of the client for identification (unused atm) def __init__(self, server, client_key, probability_error=0, testmode=False): self.server = server self.key = client_key self.logger = LogServer(__name__) self.logger.log().info("dart client initialized") self.testmode = testmode if self.testmode: self.probability_error = probability_error self.worker_list = [] self.job_list = [] def getJob(self, jobName): """! Get the job instance by name - used in testmode """ self.logger.log().info("Client.getJob: " + jobName) for job in self.job_list: if job.name == jobName: return job self.logger.log().info("Client.getJob: " + jobName + " not found") return None ## # Stop the servers def stop_servers(self): """! testmode: simulate false request codes """ self.logger.log().info("Client.stop_servers") if self.testmode: if random.uniform(0, 1) < self.probability_error: raise Exception('response not ok') return r = requests.delete(self.server + "/server/", json={'key': self.key}, verify=False) if r.status_code != requests.codes.ok: raise Exception('response not ok') ## # Gets information about the servers # # The return type has the following structure # { # 'servers' : [ # {'host' : '<host_name>', 'port' : '<port_name>'} # ] # } def get_server_information(self): if self.testmode: raise NotImplementedError("not implemented yet!") else: r = requests.get(self.server + "/server/", json={'key': self.key}, verify=False) if r.status_code != requests.codes.ok: raise Exception('response not ok') return json.loads(r.content) ## # Adds workers # # @param hosts list of all the hosts # @param workers_per_host the amount of workers to add per host # @param name the name of the worker # @param capabilities list of the capabilities of the workers # @param shm_size shared memory size # @param ssh_options an object with the following attributes # { "username": "******", "port": "...", "public-key": "...", "private-key": "..." } def add_worker(self, hosts, workers_per_host, worker_name, capabilities, shm_size, ssh={}): self.logger.log().debug("Client.add_worker " + str(locals())) if self.testmode: #capabilities, hosts is a list, unzip it for host, capability in zip(hosts, capabilities): worker = Worker(self.key, host, workers_per_host, worker_name, capability, shm_size) self.worker_list.append(worker) if random.uniform(0, 1) < self.probability_error: self.logger.log().error( "Client.add_worker: could not add worker " + str(locals())) raise Exception('response not ok') else: r = requests.post(self.server + "/worker/", json={ 'key': self.key, 'name': worker_name, 'hosts': hosts, 'workers_per_host': workers_per_host, 'capabilities': capabilities, 'shm_size': shm_size, 'ssh': ssh }, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error( "Client.add_worker: could not add worker " + str(locals())) raise Exception('response not ok') ## # Removes workers from the specified hosts # # @param hosts a list of hosts # @param ssh_options an object with the following attributes { "username": "******", "port": "...", "public-key": "...", "private-key": "..." } def remove_workers(self, hosts, ssh={}): self.logger.log().debug("Client.remove_workers " + str(locals())) if self.testmode: for worker in self.worker_list: if worker.hosts == hosts: self.worker_list.remove(worker) if random.uniform(0, 1) < self.probability_error: self.logger.log().error( "Client.remove_workers: could not remove workers " + str(locals())) raise Exception('response not ok') else: r = requests.delete(self.server + "/worker/", json={ 'key': self.key, 'hosts': hosts, 'ssh': ssh }, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error( "Client.remove_workers: could not remove workers " + str(locals())) raise Exception('response not ok') ## # Get workers # # @param hosts list of all the hosts # @param workers_per_host the amount of workers to add per host # @param name the name of the worker # @param capabilities list of the capabilities of the workers # @param shm_size shared memory size def get_workers(self): self.logger.log().debug("Client.get_workers " + str(locals())) if self.testmode: list_worker = [] for worker in self.worker_list: dict_worker = {} dict_worker['name'] = worker.worker_name dict_worker['count'] = 1 dict_worker['capabilities'] = '' list_worker.append(dict_worker) self.logger.log().debug("Client.get_workers: " + str(list_worker)) return {'workers': list_worker} else: r = requests.get(self.server + "/worker/", json={'key': self.key}, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error("Client.get_workers: " + str(locals())) raise Exception('response not ok') return json.loads(r.content) ## # Adds a job definition # # @param name the name of the job # @param module_path the path to the module on the clients # @param method the method from the module to execute def add_job(self, name, module_path, method): self.logger.log().debug("Client.add_job " + str(locals())) if self.testmode: job = Job(name, module_path, method) self.job_list.append(job) if random.uniform(0, 1) < self.probability_error: self.logger.log().error("Client.add_job: could not add job. " + str(locals())) raise Exception('response not ok') else: r = requests.post(self.server + "/job/", json={ 'key': self.key, 'name': name, 'module_path': module_path, 'method': method }, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error("Client.add_job: could not add job. " + str(locals())) raise Exception('response not ok') ## # Adds tasks to a specific job # # @param jobName the name of the job # @param location_and_parameters [ { 'location' : '...', 'parameter' : ' ...'}, ...] list def add_tasks(self, jobName, location_and_parameters): self.logger.log().debug("Client.add_tasks " + str(locals())) if self.testmode: rightJob = self.getJob(jobName) for task in location_and_parameters: workerName = task['location'] taskParameter = task['parameter'] for worker in self.worker_list: if worker.worker_name == workerName: task = Task(worker, taskParameter) rightJob.task_list.append(task) if random.uniform(0, 1) < self.probability_error: self.logger.log().error( "Client.add_tasks: could not add tasks. " + str(locals())) raise Exception('response not ok') else: rightJob.start_computation() else: r = requests.post(self.server + "/job/" + jobName + "/tasks/", json={ 'key': self.key, 'location_and_parameters': location_and_parameters }, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error( "Client.add_tasks: could not add tasks. " + str(locals())) raise Exception('response not ok') ## # Gets information about a job # # The return type has the following structure # { # 'job' : { # 'id' : '....', # 'status' : '...', # 'config' : { # 'python_home' : '...', # 'output_directory' : '...', # 'module' : '...', # 'is_module_path' : '...', # 'method' : '...' # } # } # } # # @param job the name of the job def get_job_info(self, jobName): self.logger.log().debug("Client.get_job_info " + str(locals())) if self.testmode: self.logger.log().error("Client.get_job_info: not implemented.") raise NotImplementedError("not implemented yet!") else: r = requests.get(self.server + "/job/" + jobName + "/", json={'key': self.key}, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error("Client.get_job_info: " + str(locals())) raise Exception('response not ok') return json.loads(r.content) ## # Stops a job # # @param job the job name def stop_job(self, jobName): self.logger.log().debug("Client.stop_job " + str(locals())) if self.testmode: rightJob = self.getJob(jobName) self.job_list.remove(rightJob) if random.uniform(0, 1) < self.probability_error: self.logger.log().error("Client.stop_job failed. " + str(locals())) raise Exception('response not ok') else: r = requests.delete(self.server + "/job/" + jobName + "/", json={'key': self.key}, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error("Client.stop_job failed. " + str(locals())) raise Exception('response not ok') ## # Gets the job status of a job # # @return the job status def get_job_status(self, jobName): self.logger.log().debug("Client.get_job_status " + str(locals())) if self.testmode: job_exists = False for job in self.job_list: if job.name == jobName: job_exists = True if job_exists == False: return job_status(0) if random.uniform(0, 1) < self.probability_error: self.logger.log().error("Client.get_job_status " + str(locals())) raise Exception('response not ok') else: return job_status(1) else: r = requests.get(self.server + "/job/" + jobName + "/status/", json={'key': self.key}, verify=False) if r.status_code == requests.codes.not_found: return job_status.unknown if r.status_code != requests.codes.ok: self.logger.log().error("Client.get_job_status " + str(locals())) raise Exception('response not ok') response = json.loads(r.content) return job_status(int(response['job']['status'])) ## # Gets results of the specified job # # Gets at most 'amount' different job results. Note that # this function does not delete the results from the # server. Hence, successive calls will return the same # results. # # The return type has the following structure # { # 'results' : [ # { # 'id' : '...', # 'job' : '...', # 'worker' : '...', # 'start_time' : '...', # 'duration' : '...', # 'success' : '...' or 'error' : '...' # }, # { # ... # }, # ... # ], # 'job' : { 'id' : '...', 'status' : '...'} # } # # @param job the job name # @param amount the maximal amounts of jobs to get # @param worker_regex a regex that the worker of the result has to match. Empty regex matches everything. def get_job_results(self, jobName, amount, worker_regex=""): self.logger.log().debug("Client.get_job_results " + str(locals())) if self.testmode: rightJob = self.getJob(jobName) if random.uniform(0, 1) < self.probability_error: self.logger.log().error("Client.get_job_results " + str(locals())) raise Exception('response not ok') if rightJob: return rightJob.resultDict else: self.logger.log().info( "Client.get_job_results: no such job running on server: " + jobName) return {'results': [], 'job': {}} else: r = requests.get(self.server + "/job/" + jobName + "/results/", json={ 'key': self.key, 'amount': amount, 'worker_regex': worker_regex }, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error("Client.get_job_results " + str(locals())) raise Exception('response not ok') return json.loads(r.content) ## # Removes a job result from the server # # @param job the name of the job # @param result the id of the result def delete_job_result(self, jobName, resultID): self.logger.log().debug("Client.delete_job_result " + str(locals())) if self.testmode: rightJob = self.getJob(jobName) rightJob.delete(resultID) if random.uniform(0, 1) < self.probability_error: self.logger.log().error("Client.delete_job_result " + str(locals())) raise Exception('response not ok') else: r = requests.delete(self.server + "/job/" + jobName + "/results/" + resultID + "/", json={'key': self.key}, verify=False) if r.status_code != requests.codes.ok: self.logger.log().error("Client.delete_job_result " + str(locals())) raise Exception('response not ok')
class DefaultTask(TaskBase): """! DefaultTask is a subclass of TaskBase, and therefore a specific category of tasks. As default task, we define to execute a given task with the same parameter settings (training, etc) on all available devices (possibly with a max number) that fulfill the (optional) hardware requirements. """ def __init__(self, taskName=None, parameterlists={}, model=None, hardwareRequirements={}, filePath=None, configFile=None, numDevices=-1): """! Instantiates a DefaultTask. Information about the configuration have to be either provided in the function call or via a configFile. @param taskName (optional) Name of the task to be stored in the LogServer @param parameterlists List of parameters to be used in training @param model (optional) Model to be executed @param hardwareRequirements (optional) mandatory hardware requirements @param filePath the path to the file to be called for execution on the device @param configFile (optional) the configuration file of this task @param numDevices the total amount of devices on which the task shall to be executed """ self._parameterlists = parameterlists self._model = model self._hardwareRequirements = hardwareRequirements self._numDevices = numDevices self._configFile = configFile self._filePath = filePath self._taskName = taskName self.logger = LogServer(__name__) self.logger.log().info('DefaultTask initiated') self.checkConfig() @property def numDevices(self): return self._numDevices @property def parameterlists(self): """! property: parameterlists. Implements the getter. @todo: define the format of parameter lists """ return self._parameterlists @parameterlists.setter def parameterlists(self, new_parameterlist): """! property: parameterlists. Implements the setter. @param new_parameterlist the new list of parameters """ self._parameterlists = new_parameterlist @property def taskName(self): """! property: taskName. Implements the getter. The taskName identifies the task (in combination with a timestamp) in the LogServer. """ return self._taskName @taskName.setter def taskName(self, new_taskName): """! property: taskName. Implements the setter. @param new_taskName the new name of the task """ self._taskName = new_taskName @property def hardwareRequirements(self): """! property: hardwareRequirements. Implements the getter. Hardware requirements define the (optional) mandatory requirements for the devices. @todo: define the format of hardware requirements """ return self._hardwareRequirements @hardwareRequirements.setter def hardwareRequirements(self, new_hardwareRequirements): """! property: hardwareRequirements. Implements the setter. @param new_hardwareRequirements the new mandatory hardware requirements """ self._hardwareRequirements = new_hardwareRequirements @property def model(self): """! property: model. Implements the getter. A model is an optional parameter for a task. In case a model is provided, the model is sent to the devices where it will be trained/used for inference. In case no model is provided, the task executes some given functions on the device. """ return self._model @model.setter def model(self, new_model): """! property: model. Implements the setter. @param new_model the new model to be used """ self._model = new_model @property def filePath(self): """! property: filePath. Implements the getter. A model is an optional parameter for a task. In case a model is provided, the model is sent to the devices where it will be trained/used for inference. In case no model is provided, the task executes some given functions on the device. """ return self._filePath @filePath.setter def filePath(self, new_filePath): """! property: filePath. Implements the setter. @param new_filePath the new file path """ self._filePath = new_filePath @property def configFile(self): """! property: configFile. Implements the getter. The configFile is an optional file provided at instantiation to describe the task configuration, i.e. it contains the parameters for training, (optionally) the model to be executed, which hardwareRequirements are mandatory, the name of the task, as well as the path to the python script to be executed. @todo define the structure of the configFile """ return self._configFile def loadConfigFile(self): """! @todo implement this function """ return def writeConfig(self, taskName, parameterlists, hardwareRequirements, model, filePath): """! @todo implement Writes the configuration of the task to disk. @param taskName Name of the task @param parameterlists List of parameters @param hardwareRequirements mandatory hardware requirements @param model model to be trained @param filePath path to the file to be executed on the device """ pass def checkConfig(self): """! This method ensures that all necessary parameters for the given task are provided: either by given parameters in the instantiation of the subclass or by a given filepath to a configuration file. @todo define configfile """ config = {} valid = False if self._parameterlists is None: if self._configFile is None: raise ValueError("No configuration provided") else: config = self.loadConfigFile() else: config['parameters'] = self._parameterlists config['hardwareRequirements'] = self._hardwareRequirements config['model'] = self._model config['taskName'] = self._taskName config['filePath'] = self._filePath # check the configuration if self._parameterlists is None: self.writeConfig(config['taskName'], config['parameters'], config['hardwareRequirements'], config['model'], config['filePath']) self.logger.log().info("check config: " + str(locals())) return valid
class DeviceSingle(AbstractDeviceBase): """! DeviceSingle is the interface to the real pyhsical device. @param name name given by the user or runtime @param ip_address ip address of physical device @param physical_name name by which the device connted hisself to the server @param hardwareConfig hardware properties of physical device @param openTaskDict dictionary of tasks, which are/will be running on the device format: {"task_name": {'param1': value , 'param2': value}} @param finishedTaskDict dictionary of tasks, which have already a result format: {'duration': value, 'result': {'result_0': value, 'result_1': value}} @param dartRuntime runtime for connection to physical device @param initialized boolean , if device already has received the init task """ def __init__( self , name , ipAdress , port , dartRuntime = None , physicalName = None , hardwareConfig = None , taskDict = {} , initTask = None ): self.name = name if physicalName == None: self.phyiscalName = self.name self.ipAdress = ipAdress self.port = port self._hardwareConfig = hardwareConfig self._openTaskDict = taskDict self._finishedTaskDict = {} self._dartRuntime = dartRuntime self._initTask = initTask if initTask is not None: self._initialized = False else: self._initialized = True self.logger = LogServer(__name__) self.logger.log().info("DeviceSingle " + name + " instantiated") def __str__(self): return self.name @property def hardwareConfig(self): """! property: hardwareConfig. Implements the getter """ return self._hardwareConfig @hardwareConfig.setter def hardwareConfig(self, newHardwareConfig): """! property: hardwareConfig. Implements the setter @param newHardwareConfig the new hardware config """ self._hardwareConfig = newHardwareConfig return @property def openTaskDict(self): """! property: openTaskDict. Implements the getter """ return self._openTaskDict @openTaskDict.setter def openTaskDict(self, newDict): """! property: openTaskDict. Implements the setter @param newDict the new open task dict """ self._openTaskDict = newDict @property def finishedTaskDict(self): """! property: finishedTaskDict. Implements the getter """ return self._finishedTaskDict @finishedTaskDict.setter def finishedTaskDict(self, newDict): """! property: finishedTaskDict. Implements the setter @param newDict the new finished task dict """ self._finishedTaskDict = newDict @property def dartRuntime(self): """! property: dartRuntime. Implements the getter """ return self._dartRuntime @dartRuntime.setter def dartRuntime(self, newRuntime): """! property: dartRuntime. Implements the setter @param newRuntime the new runtime """ self._dartRuntime = newRuntime @property def initTask(self): """! property: initTask. Implements the getter """ return self._initTask @initTask.setter def initTask(self, newInitTask): """! property: newInitTask. Implements the setter. Update all devices directly with new init task @param newInitTask instance of class task @todo: check if new init task result is returned """ self._initTask = newInitTask @property def initialized(self): """! property: initialized. Implements the getter """ if self._initialized == False: initTaskName = self.initTask.taskName if self.has_taskResult(initTaskName): init_result = self.get_taskResult(initTaskName) if init_result.resultList[0] is None: self._initialized = True return self._initialized @initialized.setter def initialized(self, boolInit): """! property: initialized. Implements the setter @param boolInt a boolean """ self._initialized = boolInit def isOpenTask(self, taskName): """! Check if device has an open task with such a name. @param taskName string with task name @return boolean """ if taskName in self._openTaskDict.keys(): return True else: return False def removeOpenTask(self, taskName): """! Check if device has an open task with such a name and remove it. In the other case through a KeyError @param taskName string with task name """ if taskName in self._openTaskDict.keys(): del self._openTaskDict[taskName] else: raise KeyError return def get_number_openTasks(self): """! Determine the number of open tasks of the device. @return int """ return len(self.openTaskDict) def is_online(self): """! Check if the device is currently reachable. @return boolean @todo implement the check """ return True def getOpenTaskParameter(self, taskName): """! Return the parameter of an open task. Raise an error when a task with such a name is not in openTaskDict @param taskName string with task name @return dict with format {'param1': value , 'param2': value} """ if taskName in self.openTaskDict.keys(): return self.openTaskDict[taskName] else: raise KeyError("Open task with name", taskName, "doesn't exist!") def _getFinishedTaskResult(self, taskName): """! Return the parameter of an already finished task. Raise an error when a task with such a name is not in finishedTaskDict @param taskName string with task name @return dict with format {'duration': value, 'result': {'result_0': value, 'result_1': value}} """ if taskName in self.finishedTaskDict.keys(): return self.finishedTaskDict[taskName] else: raise KeyError("Finished task with name", taskName, "doesn't exist!") def getLog(self, taskName): """! Get the log of the device results for this task. In the moment we get these results form the finishedTaskDict. In the future there can be a more advanced way like a database. @param task instance of class task """ if taskName in self._finishedTaskDict.keys(): return self._getFinishedTaskResult(taskName) else: return False def get_taskResult(self, taskName): """! Check if the taskName is known from current or old tasks. If true we check if the result is already logged in finishedTaskDict. If not get the result from runtime and check with has_taskResult is the result is only a place holder for an incoming result. If not remove the task from the dict of open task @param taskName name of the task @return instance of taskResult """ if self.hasTask(taskName): if taskName in self._finishedTaskDict.keys(): return self._getFinishedTaskResult(taskName) else: result, resultID = self.dartRuntime.get_TaskResult(taskName, self.name) taskResult = TaskResult( self.name , result["duration"] , result["result"] ) if self.has_taskResult(taskName): self.removeOpenTask(taskName) self._addFinishedTask(taskName, taskResult) self.dartRuntime.remove_result_from_server(taskName, resultID) return taskResult else: raise KeyError("No task with name " + taskName) def startTask(self, task): """! Before starting a task, the user must it add to the device. To start a task the runtime must have already a job with the taskName. Add this job if necessary. Afterwards broadcast a list with only one device entry to runtime. @param task instance of task """ taskName = task.taskName if not self.hasTask(taskName): raise ValueError("Add the task >>" + taskName + "<< to device >>" + self.name + "<< before starting the task!") #return 0 means unknown if self.dartRuntime.get_job_status(taskName) == 0: self.dartRuntime.add_job( taskName , task.filePath , task.executeFunction ) self.dartRuntime.broadcastTaskToDevices( taskName , [self.name] , [self.getOpenTaskParameter(taskName)] ) def has_taskResult(self, taskName): """! Check the taskResult. If the result has the key duration with value None than the device hasn't anything returned yet. @param taskName name of the task @return boolean True or False @todo atm hacky because of REST API. To check if result is there, we must get it and check the components of the result. """ if self.hasTask(taskName): if taskName in self._finishedTaskDict.keys(): return True else: result, resultID = self.dartRuntime.get_TaskResult(taskName, self.name) if result['duration'] == None: return False else: return True else: raise KeyError("No task with name " + taskName) def addTask(self, taskName, taskParameter): """! Add a new open task with name and parameters to openTaskDict @param taskName string of task name @param taskParamerer dict in format {'param1': value , 'param2': value} """ if taskName in self._openTaskDict.keys(): raise KeyError(taskName + " already in openTaskDict!") tasks = self._openTaskDict tasks[taskName] = taskParameter self.openTaskDict = tasks def _addFinishedTask(self, taskName, taskResult): """! Add a new finished task with name and results to finishedTaskDict @param taskName string of task name @param taskResult dict with format {'duration': value , 'result': {'result_0': value, 'result_1': value} } """ if taskName in self._finishedTaskDict.keys(): raise KeyError(taskName + " already in finishedTaskDict!") tasks = self._finishedTaskDict tasks[taskName] = taskResult self.finishedTaskDict = tasks def hasTask(self, taskName): """! Check if the device has a open task with such a name @param taskName name of task @return boolean True/False """ if taskName in self._openTaskDict.keys() or taskName in self._finishedTaskDict.keys(): return True else: return False
class SpecificParameterTask(TaskBase): def __init__(self, taskName, parameterlists={}, model=None, hardwareRequirements={}, filePath=None, configFile=None): self._parameterlists = parameterlists self._model = model self._hardwareRequirements = hardwareRequirements self._configFile = configFile self._filePath = filePath self._taskName = taskName self.logger = LogServer(__name__) self.logger.log().info('SpecificParameterTask initiated') self.checkConfig() @property def parameterlists(self): return self._parameterlists @parameterlists.setter def parameterlists(self, new_parameterlist): self._parameterlists = new_parameterlist @property def model(self): return self._model @model.setter def model(self, new_model): self._model = new_model @property def specificDevices(self): return list(self._parameterlists.keys()) @property def configFile(self): return self._configFile def writeConfig(self, parameterlists, model, hardwareRequirements): raise NotImplementedError("not implemented yet") def checkConfig(self): config = {} valid = False if self._parameterlists is None: if self._configFile is None: raise ValueError("No configuration provided") else: config = self.loadConfigFile() else: config['parameters'] = self._parameterlists config['hardwareRequirements'] = self._hardwareRequirements config['model'] = self._model config['taskName'] = self._taskName config['filePath'] = self._filePath # check the configuration # todo: define configfile if self._parameterlists is None: self.writeConfig(config['taskName'], config['parameters'], config['hardwareRequirements'], config['model'], config['filePath']) return valid def loadConfigFile(self): raise NotImplementedError("not implemented yet")
class Selector(): """! Selector has the knowledge about all connected devices. The Selector is responsible to shedule devices to the device holder based on (optional) hardware requirements. """ """! @param _maxNumDeviceHolder maximal number of deviceHolders per aggregator/childaggregator @param _maxNumChildAggregators maximal number of allowed childAggregators """ _maxNumDeviceHolder = 2 _maxNumChildAggregators = 2 def __init__(self, runtime=None, maxSizeDeviceHolder=-1, initTask=None): """! Instantiate a Selector (singleton). @param runtime The runtime for the connections to physical devices @param maxSizeDeviceHolder Maximal amount of devices in a device holder @param devices List of connected devices @param aggregators List of aggregators @param device_holders List of device_holders @param taskQueue List of tasks in queue @param initTask task which must be executed at each device firstly """ self._runtime = runtime self._maxSizeDeviceHolder = maxSizeDeviceHolder self._devices = [] self._aggregators = [] self._device_holders = [] self._taskQueue = [] self._initTask = initTask self.logger = LogServer(__name__) self.logger.log().info('Selector initiated') @property def runtime(self): """! property: runtime. Implements the getter """ return self._runtime @property def devices(self): """! property: devices. Implements the getter for the registeredDevices. Get the registeredDevices directly from runtime. For new devices send initTask directly out. """ self._devices = self._runtime.registeredDevices if self._initTask: self.send_initTask_to_newDevices(self._devices) return self._devices @property def deviceNames(self): """! property: name of devices. Implements the getter @todo: is this property necessary? """ self.logger.log().debug("selector.deviceNames: " + str([device.name for device in self.devices])) return [device.name for device in self.devices] @property def device_hardwareConfigs(self): """! property: device_hardwareConfigs. Implements the getter @todo: is this property necessary ? """ self.logger.log().debug( "selector.device_hardwareConfigs: " + str([device.hardwareConfig for device in self.devices])) return [device.hardwareConfig for device in self.devices] @property def device_holders(self): """! property: device_holders. Implements the getter """ return self._device_holders @device_holders.setter def device_holders(self, newDeviceHolders): """! property: device_holders. Implements the setter @param newDevice_holders the new list of device_holders """ self.logger.log().debug("selector. set new deviceholder") self._device_holders = newDeviceHolders @property def initTask(self): """! property: initTask. Implements the getter """ return self._initTask @initTask.setter def initTask(self, newInitTask): """! property: newInitTask. Implements the setter. Update all devices directly with new init task @param newInitTask instance of class task @todo: check if new init task result is returned """ self.logger.log().debug("selector. set new initTask") if not isinstance(newInitTask, InitTask): raise ValueError("object is no instance of InitTask") self._initTask = newInitTask self.send_initTask_to_newDevices(self.devices) @property def aggregators(self): """! property: aggregators. Implements the getter """ return self._aggregators @aggregators.setter def aggregators(self, newAggregators): """! property: aggregators. Implements the setter @param newAggregators the new list of aggregators """ self._aggregators = newAggregators @property def maximal_size_device_holder(self): """! property: maxSizeDeviceHolder. Implements the getter """ self.logger.log().debug("selector. _maxSizeDeviceHolder " + str(self._maxSizeDeviceHolder)) return self._maxSizeDeviceHolder @maximal_size_device_holder.setter def maximal_size_device_holder(self, newSize): """! property: maxSizeDeviceHolder. Implements the setter @param newSize new maximal number of allowd device holders. """ self.logger.log().debug("selector. maximal_size_device_holder " + str(newSize)) self._maxSizeDeviceHolder = newSize #-------------- functions for device related aspects---------------------- def send_initTask_to_newDevices(self, deviceList): """! In the case that a device has connected on their own we must send the init task to them before sending another tasks. """ self.logger.log().debug( "selector. send_initTask_to_newDevices. deviceList " + str(deviceList)) initializationDevices = [] for device in deviceList: if device.hasTask(self.initTask.taskName) == False: initializationDevices.append(device) number_not_inializedDevices = len(initializationDevices) if number_not_inializedDevices > 0: deviceHolder = DeviceHolder(maxSize=number_not_inializedDevices) for device in initializationDevices: deviceHolder.addDevice(device, self.initTask.taskName, self.initTask.parameterDict) deviceHolder.broadcastTask(self.initTask) def addSingleDevice(self, deviceName, ipAdress, port, hardwareConfig): """! Add a single device to runtime @param deviceName string with device name @param ipAdress string with IP adress @param port int with device port @param hardwareConfig dict with devices hardware config """ self.logger.log().debug('addSingleDevice:' + "deviceName " + deviceName + ",ipAdress " + str(ipAdress) + ",port " + str(port) + ",hardwareConfig " + str(hardwareConfig)) initTask = self.initTask self.runtime.generate_and_add_SingleDevice(deviceName, ipAdress, port, hardwareConfig, initTask) def removeDevice(self, deviceName): """! Remove a device from the runtime. A possible reason for that could be that this device is corrupted. @param deviceName string with Name of device """ if deviceName in self.deviceNames: self.runtime.removeDevice(deviceName) else: self.logger.log().error("There is no device with name " + deviceName) raise ValueError("There is no device with name " + deviceName) def requestTaskAcceptance(self, task): """! Decide if enough devices fullfil the hardware requirements of the incoming task. In a first step the selector determines the devices, which are currently available for computation. In a second step the possible devices are checked from the task, if the fullfill the task criteria. Based on this criteria accept or reject the task. @param task instance of task @return task_acceptance boolean """ initializedDevices = [] for device in self.devices: if device.initialized: initializedDevices.append(device) task_acceptance = task.checkConstraints(initializedDevices) return task_acceptance #------------functions for aggregator related aspects -------- def get_aggregator_of_task(self, taskName): """! Iterate over all known aggregators to get the aggregator of the specific task. Raise a ValueError if the aggregator doesn't exist. @param taskName string with task name @return aggregator instance of Aggregator """ self.logger.log().debug( "Selector. get_aggregator_of_task: search aggregator for task " + taskName) for aggregator in self.aggregators: if aggregator.task.taskName == taskName: self.logger.log().debug( "Selector.get_aggregator_of_task: aggregator for " + taskName + " identified") return aggregator raise ValueError("There is no aggregator that handles task " + taskName) def addAggregator(self, newAggregator): """! Add a new aggregator to the aggregtor list. @param newAggregator instance of aggregator """ aggregators = self.aggregators self.logger.log().debug("Selector.addAggregator: old " + str(len(self.aggregators))) aggregators = aggregators + [newAggregator] self.aggregators = aggregators self.logger.log().debug("Selector.addAggregator: new " + str(len(self.aggregators))) def deleteAggregatorAndTask(self, taskName): """! Get the aggregator of the task, stop the task on the DART-Server and remove the task from the open task dict on each device. Afterwards the aggregator is deleted and new tasks are uploaded to the DART-Server @param taskName string with task name """ try: aggregator = self.get_aggregator_of_task(taskName) except ValueError: self.logger.log().error( "There is no aggregator that handles task " + taskName) aggregator.stopTask() self.deleteAggregator(aggregator) self.addTasks2Runtime() def deleteAggregator(self, aggregator): """! Remove a aggregator from the aggregator list and delete the aggregator. @param aggregator instance of aggregator """ if aggregator in self.aggregators: self._aggregators.remove(aggregator) del aggregator else: raise ValueError("aggregator is not in selector") def instantiateAggregator(self, task): """! Instantiate DeviceAggregator. Check in create_needed_childAggregators if the aggregator has enough capacity for the amount of Device, if not create childAggregators recursively. Add task and devices to aggregator @param numDevices amount of devices """ choosen_devices = self.getDevicesForAggregator(task) aggregator = DeviceAggregator( devices=choosen_devices, task=task, maxSizeDeviceHolder=self._maxSizeDeviceHolder, maxNumDeviceHolder=self._maxNumDeviceHolder, maxNumChildAggregators=self._maxNumChildAggregators, logServer=None) self.logger.log().info("max # devices in aggregator: " + str(aggregator.get_max_number_devices())) self.addAggregator(aggregator) return aggregator def getDevicesForAggregator(self, task): """! Check which devices fullfill the requirements of the task and if they are available. @param task instance of task """ #TODO: only implemented for specificDeviceTask if not self.devices: raise ValueError("selector: no devices at all") suitable_devices = [] for device in self.devices: # add task to device if right if not nothing #TODO check if all specificDevices are in self.devices if device.name in task.specificDevices: suitable_devices.append(device) # return the list if len(suitable_devices) == 0: raise ValueError("selector: no devices") else: return suitable_devices #------------functions related to queue aspect--------------- def taskInQueue(self, taskName): """! Check if task is in the task queue from selector. @param taskName string with task name @return booleanQueue boolean """ booleanQueue = False for task in self._taskQueue: if task.taskName == taskName: booleanQueue = True break return booleanQueue def deleteTaskInQueue(self, taskName): """! Delete task from selectors task queue. Afterwards check if the DART-Server has capabilities for new tasks. @param taskName string with task name. """ if self.taskInQueue(taskName): for task in self._taskQueue: if task.taskName == taskName: self._taskQueue.remove(task) self.addTasks2Runtime() def addTask2Queue(self, task, priority=False): """! Add a new task to the queue. Task is already checked for feasibility @param task the task to be scheduled """ self.logger.log().info("selector. add task to queue") if task in self._taskQueue: self.logger.log().error("selector. task already scheduled") raise KeyError("Task already scheduled") # add task to queue if priority: self._taskQueue.insert(0, task) else: self._taskQueue.append(task) self.addTasks2Runtime() def addTasks2Runtime(self): """! Check if the DART-Server has capabilities to schedule new task. Iterate over all task in the queue and check which task can be executed at the moment. If yes instantiate aggregator for task and remove task from queue """ capacitynewTasks = self.runtime.get_Capacity_for_newTasks() for task in self._taskQueue: if capacitynewTasks <= 0: break if self.requestTaskAcceptance(task): aggregator = self.instantiateAggregator(task) aggregator.sendTask() capacitynewTasks -= 1 self.deleteTaskInQueue(task.taskName)
class DartRuntime: def __init__( self , server , client_key , testMode , errorProbability , maximal_number_devices = -1 , maximalNumberOpenJobs = 10 , **kwargs ): """! @param maximal_number_devices maximal number of devices for this runtime @param maximalNumberOpenJobs maximal number of allowed maximal jobs @param registeredDevices dict with key device name and value instance of class device @param messageTranslator translator between Python and DART format @param selector instance of selector @param server the server addr, e.g., "https://127.0.0.1:7777" @param client_key the key of the client for identification (unused atm) @param counterJobs int number of open jobs on server """ if testMode: self._restAPIClient = Client(server, client_key, probability_error = errorProbability, testmode = True) else: self._restAPIClient = Client(server, client_key) self._maximal_number_devices = maximal_number_devices self._maximalNumberOpenJobs = maximalNumberOpenJobs self._registeredDevices = {} self._messageTranslator = MessageTranslator() self._selector = None self._counterJobs = 0 #in our case Jobs = Task self.logger = LogServer(__name__) self.logger.log().info("DartRuntime initiated") @property def restAPIClient(self): """! property: runtime. Implements the getter """ return self._restAPIClient @property def registeredDevices(self): """! property: registeredDevices. Implements the getter """ self.updateRegisteredDevices() return list(self._registeredDevices.values()) @property def registeredDevicesbyName(self): """! property: registeredDevices. Implements the getter """ self.updateRegisteredDevices() return list(self._registeredDevices.keys()) @registeredDevices.setter def registeredDevices(self, newRegisteredDevices): """! property: registeredDevices. Implements the setter @param newRegisteredDevices the new list of registered devices """ self._registeredDevices = newRegisteredDevices @property def selector(self): """! property: registeredDevices. Implements the getter """ return self._selector @property def maximal_number_devices(self): """! property: maximal_number_devices. Implements the getter """ return self._maximal_number_devices @maximal_number_devices.setter def maximal_number_devices(self, new_maximal_number_devices): """! property: maximal_number_devices. Implements the setter @param new_maximal_number_devices the new maximal number devices """ self._maximal_number_devices = new_maximal_number_devices @property def maximalNumberOpenJobs(self): """! property: maximalNumberOpenJobs. Implements the getter """ return self._maximalNumberOpenJobs @property def counterJobs(self): """! property: counterJobs. Implements the getter """ return self._counterJobs def updateRegisteredDevices(self): """! Fetch from the DART-server the currently connected devices. If needed, create new virtual devices or delete them. """ oldRegisteredDevicesbyName = deepcopy(list(self._registeredDevices.keys())) newRegisteredDevices = self.restAPIClient.get_workers() newRegisteredDevices = newRegisteredDevices["workers"] newRegisteredDevicesbyName = [] for device in newRegisteredDevices: newRegisteredDevicesbyName.append(device["name"]) for newDevice in newRegisteredDevicesbyName: if newDevice not in oldRegisteredDevicesbyName: #add new Devices initTask = None if self.selector is not None: initTask = self.selector.initTask device = DeviceSingle( name = newDevice , ipAdress = None , port = None , dartRuntime = self , physicalName = None , hardwareConfig = {} , taskDict = {} , initTask = initTask ) self._registeredDevices[newDevice] = device for oldDevice in oldRegisteredDevicesbyName: if oldDevice not in newRegisteredDevicesbyName: del self._registeredDevices[oldDevice] def getServerInformation(self): """! Gets information about the servers. The return type has the following structure {'servers': [{'host': '<host_name>', 'port': '<port_name>'}]} """ return self.restAPIClient.get_server_information() def get_Capacity_for_newTasks(self): """! The server have a maximal amount of simultaneously open jobs. Determine the difference between the maximal and curent amount. """ return self.maximalNumberOpenJobs - self._counterJobs def get_TaskStatus(self, taskName): """! Get the status of the job from runtime. In our case is job the same as a task. @param taskName string with name of the task @return int 0 (unknown), 1 (running), 2 (stopped) """ return self.restAPIClient.get_job_status(taskName) def remove_result_from_server(self, taskName, resultID): """! Remove a result with his ID from a job. @param taskName string with name of the task @resultID unique identifier for the task result of a specifif device """ self.restAPIClient.delete_job_result(taskName, resultID) def get_TaskResult(self, taskName, deviceName): """! Send a regex pattern with the device name to the server and get maximal so many results as devives are available. Extract the result and ID from the server message like { 'results': [{ 'id': '4d045be3-fb57-44f4-902f-b93abab3d830', 'job': 'task_one' , 'worker': 'device_one-PSL188-1', 'start_time': '1611135657000' , 'duration': '6.07332611', 'success': 'gASVHwAAAAAAAAB9lCiMCHJlc3VsdF8wlEsBjAhyZXN1bHRfMZRLCnUu\n' } ] , 'job': {'id': 'task_one', 'status': '1'} } with the messageTranslator. @param taskName string with task name @param deviceName string with devie name @return resultDevice dict with format {'duration': '6.07465839', 'result': {'result_0': 1, 'result_1': 10}} @return resultID string with format '5ad55670-3ad4-4bb9-99cc-2b82b85bd8c2' """ maxNumberResults = len(self.registeredDevices) taskResult = self.restAPIClient.get_job_results(taskName, maxNumberResults, deviceName + ".*") resultDevice, resultID = self._messageTranslator.convertDart2Python(taskResult, deviceName) self.logger.log().debug("DartRuntime.get_TaskResult: " + str(locals())) return resultDevice, resultID def instantiateSelector(self, max_size_deviceHolder): """! Create the Selector after starting the runtime @param max_size_deviceHolder maximal amount of device per deviceholder @param self._selector instance of class selector """ self._selector = Selector(self, max_size_deviceHolder) return self._selector def add_SingleDevice( self, device): """! Add an already existing single device (one worker per device) to runtime. send directly the initTask to device. @param device device to be registered """ self.logger.log().debug("dartRuntime.add_SingleDevice " + str(locals())) if device.name in self._registeredDevices.keys(): self.logger.log().error("device name already in list: " + device.name) raise KeyError("device name already in list") self._registeredDevices[device.name] = device #add workers is blocking! self.restAPIClient.add_worker( [device.ipAdress], 1, device.name, [""],0,{}) if device.initTask is not None: device.startTask(device.initTask) #TODO Luca: where to specify port ?! self.logger.log().info("dartRuntime.add_SingleDevice " + device.name + " registered") def generate_and_add_SingleDevice( self , deviceName , deviceIp , port , hardwareConfig , initTask ): """! Add a single device (one worker per device) to runtime. Therefore also create an instane of DeviceSingle. Afterwards send directly the initTask to device. @param deviceName string with device name @param deviceIp ip address of real physical device @param port port of real physical device @param hardware_config hardware properties like processor type, memory connection bandwith and so on @todo: specify hardwareConfig @param initTask instance of class initTask """ self.logger.log().debug("dartRuntime.generate_and_add_SingleDevice " + str(locals())) if deviceName in self._registeredDevices.keys(): self.logger.log().error("device name already in list: " + deviceName) raise KeyError("device name already in list") device = DeviceSingle( name = deviceName , ipAdress = deviceIp , port = port , dartRuntime = self , physicalName = None , hardwareConfig = hardwareConfig , taskDict = {} , initTask = initTask ) self._registeredDevices[deviceName] = device #add workers is blocking! self.restAPIClient.add_worker( [deviceIp], 1, deviceName, [""],0,{}) if initTask is not None: device.addTask(initTask.taskName, initTask.parameterDict) device.startTask(initTask) #TODO Luca: where to specify port ?! self.logger.log().info("dartRuntime.generate_and_add_SingleDevice " + deviceName + " registered") def removeDevice(self, deviceName): """! Remove device from runtime and registeredDevice list. @param deviceName string with name of device @param device instance of device @todo good idea to destroy device ? """ if deviceName not in self.registeredDevicesbyName: self.logger.log().error("device name not in list: " + deviceName) raise KeyError("device name is not in list") device = self.getDevice(deviceName) self.restAPIClient.remove_workers(device.ipAdress) del device #TODO: good idea to destroy device del self._registeredDevices[deviceName] def getDevice(self, deviceName): """! Get the instance of device by name. @param deviceName string with deviceName @return instance of device """ if deviceName not in self.registeredDevicesbyName: self.logger.log().error("device name not in list: " + deviceName) raise KeyError("device name is not in list") return self._registeredDevices[deviceName] def get_job_status(self, jobName): """! The job status can have the values unknown, stopped or running. The status is translated into an int. @param jobName name of job. Equal to taskName @return int 0,1 or 2. @todo atm hacky. """ jobStatus = self.restAPIClient.get_job_status(jobName) #TODO: ask Luca why return is job_status.unknown if jobStatus == job_status.unknown: return 0 elif jobStatus == job_status.stopped: return 2 else: return 1 def add_job(self, name, module_path, method): """! In our implemtation we start for every task an own job to have a clear separation between different federated learning rounds @param name string with job/task name @module path relativ path to file based on default path in worker.json @method method with should be executed in file """ self.restAPIClient.add_job(name, module_path, method) self._counterJobs += 1 self.logger.log().info("added job: " + module_path + " " + str(method) + " new #jobs: " + str(self._counterJobs)) return def add_tasks(self, jobName, location_and_parameters): """! Add task to a job. We have for every task a new job. We specify for each device his own parameter. @param location_and_parameters list of form [ { 'location' : '...', 'parameter' : ' ...'}, ...] @todo this function can be removed ?! """ self.restAPIClient.add_tasks(jobName, location_and_parameters) self.logger.log().error("added task: " + jobName + " " + str(location_and_parameters)) return def broadcastTaskToDevices(self, taskName, deviceNamesList, parameterList): """! Send task to specified physical devices at the same time @param taskName string of task name @param deviceNamesList list of device names like ['device_one', 'device_two'] @param parameterList specifies parameters for devices like [{'param1': 0, 'param2': 1}, {'param1': 10, 'param2': 5}] """ self.logger.log().debug("broadcastTaskToDevices") for deviceName in deviceNamesList: if deviceName not in self.registeredDevicesbyName: self.logger.log().error("broadcastTaskToDevices: " + deviceName + " is not known!") raise ValueError("Device with name " + deviceName + " is not known!") parameterDARTformat = self._messageTranslator.convertPython2Dart(deviceNamesList, parameterList) self.restAPIClient.add_tasks(taskName, parameterDARTformat) def get_ServerInformation(self): """! Return server informations @todo error messages in the moment """ self.logger.log().debug("get_ServerInformation " + str(self.runtime.get_server_information())) return self.restAPIClient.get_server_information() def stopTask(self, taskName): """! Stop a task/job on the server. Therefore also decrease the counter of Jobs on the server. @param taskName string with task name """ self._counterJobs -= 1 self.restAPIClient.stop_job(taskName) self.logger.log().debug("stopTask " + taskName + " new #jobs: " + str(self._counterJobs)) def stopRuntime(self): """! Stop the server. """ self.restAPIClient.stop_servers()
class SpecificDeviceTask(TaskBase): """ As SpecificDeviceTask, we define to execute a given task with (possibly) different parameter settings (training, etc) on the specific devices that fulfill the (optional) hardware requirements. """ def __init__(self, taskName=None, parameterDict={}, model=None, hardwareRequirements={}, filePath=None, executeFunction=None, configFile=None): """! Instantiates a SpecificDeviceTask. Information about the configuration have to be either provided in the function call or via a configFile. @param taskName (optional) Name of the task to be stored in the LogServer @param parameterDict Dict of device names and associated parameters to be used in training @param model (optional) Model to be executed @param hardwareRequirements (optional) mandatory hardware requirements @param filePath the path to the file to be called for execution on the device @param executeFunction name of function, which shoule be executed in filePath @param configFile (optional) """ self._parameterDict = parameterDict self._model = model self._filePath = filePath self._hardwareRequirements = hardwareRequirements self._configFile = configFile self._executeFunction = executeFunction self._taskName = taskName self.logger = LogServer(__name__) self.logger.log().info("SpecificDeviceTask initiated") self.checkConfig() @property def filePath(self): return self._filePath @property def taskName(self): return self._taskName @property def numDevices(self): return len(self.specificDevices) @property def executeFunction(self): return self._executeFunction @property def parameterDict(self): """! property: parameterDict. Implements the getter. @return format {"device_name": deviceParameterDict} """ return self._parameterDict @parameterDict.setter def parameterDict(self, new_parameterDict): """! property: parameterDict. Implements the setter. @param new_parameterDict the new parameterDict """ self._parameterDict = new_parameterDict @property def model(self): """! property: model. Implements the getter. A model is an optional parameter for a task. In case a model is provided, the model is sent to the devices where it will be trained/used for inference. In case no model is provided, the task executes some given functions on the device (filePath has to be defined). """ return self._model @model.setter def model(self, new_model): """! property: model. Implements the setter. @param new_model the new model to be trained """ self._model = new_model @property def specificDevices(self): """! Returns a list of devices on which the task is to be executed. """ return list(self._parameterDict.keys()) @property def configFile(self): """! property: configFile. Implements the getter. The configFile is an optional file provided at instantiation to describe the task configuration, i.e. it contains the parameters for training, (optionally) the model to be executed, which hardwareRequirements are mandatory, the name of the task, as well as the path to the python script to be executed. @todo define the structure of the configFile """ return self._configFile def writeConfig(self, taskName, parameterlists, hardwareRequirements, model, filePath): """! @todo implement @param taskName Name of the task @param parameterlists List of parameters @param hardwareRequirements mandatory hardware requirements @param model model to be trained @param filePath path to the file to be executed on the device """ return def checkConfig(self): """! This method ensures that all necessary parameters for the given task are provided: either by given parameters in the instantiation of the subclass or by a given filepath to a configuration file. """ config = {} valid = False if self._parameterDict is None: if self._configFile is None: self.logger.log().error( "specificDeviceTask.checkConfig: no config provided") raise ValueError("No configuration provided") else: config = self.loadConfigFile() else: config['parameters'] = self._parameterDict config['hardwareRequirements'] = self._hardwareRequirements config['model'] = self._model config['taskName'] = self._taskName config['filePath'] = self._filePath self.logger.log().debug("specificDeviceTask.checkConfig: " + str(locals())) # check the configuration # todo: define configfile # why we nedd load and write of the config ? - might be too large if self._parameterDict is None: self.writeConfig(config['taskName'], config['parameters'], config['hardwareRequirements'], config['model'], config['filePath']) return valid def getDeviceParameterDict(self, deviceName): if deviceName not in self.specificDevices: self.logger.log().error( "specificDeviceTask.getDeviceParameterDict: " + deviceName + "does not apply for this task") raise KeyError("Device with name" + deviceName + " not included in task") else: return self.parameterDict[deviceName] def checkConstraints(self, listDevices): """! Check if all devices, which are specified by name in the task are in list_device and if they fullfill the hardware requirements. @param list_device currently available devices @todo implement check hardware requirements if necessary """ devicesSuited = True listDeviceNames = [device.name for device in listDevices] for task_neededDevice in self.specificDevices: if task_neededDevice not in listDeviceNames: devicesSuited = False return devicesSuited def loadConfigFile(self): """! @todo implement this function """ return
class DeviceAggregator(AggregatorBase): """! DeviceAggregator is responsible for the data aspect of a task """ def __init__(self, devices, task, maxSizeDeviceHolder=10, maxNumDeviceHolder=5, maxNumChildAggregators=5, logServer=None): """! @param task instance of a task @param deviceHolders list of deviceHolders @param childAggregators list of childAggregators @param maxSizeDeviceHolder maximum number of devices for deviceHolder @param maxNumDeviceHolder maximum number of deviceHolders per aggregator/childaggregator @param maxNumChildAggregators maximum number of allowed childAggregators @param aggregatedResult aggregated result of local task results @param logServer storage of the results and/or aggregated result """ self._task = task self._maxSizeDeviceHolder = maxSizeDeviceHolder self._maxNumDeviceHolder = maxNumDeviceHolder self._maxNumChildAggregators = maxNumChildAggregators self._deviceHolders = [] self._childAggregators = [] numDevices = task.numDevices if maxNumChildAggregators > 0: amount_childAggregators = self.compute_required_childAggregators_count( numDevices) self._childAggregators = [ DeviceAggregator( [ ] #init at first without devices; devices will be added at the end of constructor , task, maxSizeDeviceHolder=maxSizeDeviceHolder, maxNumDeviceHolder=maxNumDeviceHolder, maxNumChildAggregators=0, logServer=logServer) for _ in range(amount_childAggregators) ] self._aggregatedResult = None self._logServer = logServer self._instantiateDeviceHolders() for device in devices: self.addSingleDevice(device) #add here task to devices self.logger = LogServer(__name__) self.logger.log().info("Aggregator initiated") #-------------------------------------------- @property def maxNumDeviceHolder(self): """! property: maxNumDeviceHolder. Implements the getter """ return self._maxNumDeviceHolder #-------------------------------------------- @property def maxNumChildAggregators(self): """! property: maxNumChildAggregators. Implements the getter """ return self._maxNumChildAggregators #-------------------------------------------- @property def maxSizeDeviceHolder(self): """! property: maxSizeDeviceHolder. Implements the getter """ return self._maxSizeDeviceHolder #---------------------------------- @property def logServer(self): """! property: logServer. Implements the getter """ return self._logServer @logServer.setter def logServer(self, newLogServer): """! property: logServer. Implements the setter @param newLogServer the new logServer """ self._logServer = newLogServer #---------------------------------- @property def task(self): """! property: task. Implements the getter """ return self._task @task.setter def task(self, newTask): """! property: deviceHolders. Implements the setter @param newTask instance of task """ self._task = newTask if self.childAggregators: for child in self.childAggregators: child.task = newTask #---------------------------------- @property def deviceHolders(self): """! property: deviceHolders. Implements the getter """ return self._deviceHolders @deviceHolders.setter def deviceHolders(self, newDeviceHolders): """! property: deviceHolders. Implements the setter @param newDeviceHolders list of deviceHolders """ if self._childAggregators: raise ValueError("Child aggregators exist!") self._deviceHolders = newDeviceHolders #------------------------------------ @property def allDevices(self): """! property: currentDevices. Implements the getter """ allDevices = [] for dHolder in self.deviceHolders: allDevices.extend(dHolder.devices) if self.childAggregators: for child in self.childAggregators: for device_holder in child.deviceHolders: allDevices.extend(device_holder.devices) return allDevices #------------------------------------- @property def childAggregators(self): """! property: childAggregators. Implements the getter """ return self._childAggregators @childAggregators.setter def childAggregators(self, newChildAggregators): """! property: childAggregators. Implements the setter @param newChildAggregators list of childAggregators """ self._childAggregators = newChildAggregators #------------------------------------- @property def aggregatedResult(self): """! property: aggregatedResult. Implements the getter """ return self._aggregatedResult @aggregatedResult.setter def aggregatedResult(self, newAggregatedResult): """! property: aggregatedResult. Implements the setter @param newAggregatedResult """ self._aggregatedResult = newAggregatedResult def get_max_number_devices(self): """! Get total number of devices from deviceAggregator or in case of childAggregators iterate over all child aggregators. @return total int number of maximal amount of devices """ if not self.childAggregators: return self._maxNumDeviceHolder * self._maxSizeDeviceHolder else: total = 0 for child in self.childAggregators: total += child.get_max_number_devices() return total def get_OnlineDevices(self): """! Get a list of devices, which are online. In case of childAggregators iterate over childaggregatos and get from them the online devices @return deviceList """ deviceList = [] if self.childAggregators: for aggregator in self.childAggregators: deviceList.extend(aggregator.get_OnlineDevices()) return deviceList for dHolder in self.deviceHolders: deviceList.extend(dHolder.getOnlineDevices()) return deviceList #-------------functions for setting up device aggregator------------------ def compute_required_childAggregators_count(self, numberDevices): """! Check if child aggregators are required and compute the amount @param numberDevices int amount of required devices """ amount_needed_childAggregators = 0 maxDevices = self._maxNumDeviceHolder * self._maxSizeDeviceHolder if maxDevices < numberDevices: # get necessary devices per childAggregators devicesPerChild = math.ceil(numberDevices / self._maxNumChildAggregators) if devicesPerChild > self._maxSizeDeviceHolder: raise ValueError( "More devices are required than allowed per child aggregator!" ) amount_needed_childAggregators = math.ceil(numberDevices / maxDevices) return amount_needed_childAggregators def _instantiateDeviceHolders(self): """! Instantiate devicHolders and append to list of DeviceHolders """ if len(self.deviceHolders) > 0: raise ValueError("device holder already instantiated") elif len(self.childAggregators) > 0: pass else: for i in range(self._maxNumDeviceHolder): deviceHolder = DeviceHolder(self.maxSizeDeviceHolder) self.deviceHolders.append(deviceHolder) def addSingleDevice(self, device): """! Add single device to aggregator. If we have child aggregators iterate over child aggregators and add it to the first one, who have capacity. @param device instance of class deviceSingle """ if not isinstance(device, DeviceSingle): raise Exception("Device is not an instance of DeviceSingle !") if self.childAggregators: for aggregator in self.childAggregators: if aggregator.addSingleDevice(device) == True: return raise ValueError("Device holders are completely full!") else: for deviceHolder in self.deviceHolders: if device in deviceHolder.devices: raise Exception("Device is already in deviceHolder!") if deviceHolder.check_full() == False: deviceName = device.name deviceParameterDict = self.task.getDeviceParameterDict( deviceName) taskName = self.task.taskName deviceHolder.addDevice(device, taskName, deviceParameterDict) return True if self.maxNumChildAggregators > 0: raise ValueError("Device holders are completely full!") #-------------functions regarding task status----------------------- def sendTask(self): """! Send task to device. Each Aggregator iterate over the device holders, which broadcast the task to the runtime. """ if self.task is None: raise ValueError("There is no task assigned!") if self.childAggregators: for aggregator in self.childAggregators: aggregator.sendTask() else: for device_holder in self.deviceHolders: if not device_holder.check_empty(): device_holder.broadcastTask(self.task) def isTaskFinished(self): """! Return the status of the task. If there is no child aggregators iterate over all deviceHolders and check the task status. To look at each deviceHolder is necessary, because the devices in different deviceHolders can have different servers. Each deviceHolder iterates over his devices and get the device task status. Based on this number we decide over the task status. @return string "in progress" or "finished" """ if self.task is None: raise ValueError("There is no task assigned!") taskFinished = True taskName = self.task.taskName if not self.childAggregators: for device_holder in self.deviceHolders: dhFinished = device_holder.devicesFinished(taskName) if not dhFinished: taskFinished = False break for aggregator in self.childAggregators: childAggregatorTaskFinished = aggregator.isTaskFinished() if not childAggregatorTaskFinished: taskFinished = False break return taskFinished def stopTask(self): """! Remove the task from each device over the device holders from the aggregator. Afterwards delete the deviceHolders. Already finished devices has logged their results on their own """ if self.task is None: raise ValueError("There is no task assigned!") taskName = self.task.taskName if self.childAggregators: for childAggregator in self.childAggregators: childAggregator.stopTask() else: for deviceHolder in self.deviceHolders: deviceHolder.stopTask(taskName) del deviceHolder #-----------------functions for result aggregation----------------- def aggregate_devicesResults(self): """! Collect results from device, which are finished, and aggregate them optional. @return list with instances of taskResult """ # check all deviceholders if not self.deviceHolders: self.logger.log().info('no device holders available') # get all devices from the deviceholders taskName = self.task.taskName intermediateResults = [] for dHolder in self.deviceHolders: intermediateResults.extend(dHolder.get_finishedTasks(taskName)) return intermediateResults def requestAggregation(self): """! In case of an aggregator with childaggregators, this function triggers the aggregations by the child aggregators. Otherwise, it aggregates the results provided by all finished devices in its deviceHolders. @param boolean_aggregate option to aggregate the results directly @return list with instances of taskResult """ if not self.childAggregators: self.logger.log().info( 'deviceAggregator.requestAggregation: collect results from devices' ) aggregatedResult = self.aggregate_devicesResults() return aggregatedResult #TODO: in moment sequential, parallelize it result = [] for aggregator in self.childAggregators: self.logger.log().info( 'deviceAggregator.requestAggregation: trigger aggregation by childaggregators' ) result.extend(aggregator.requestAggregation()) return result #--------------functions for writing log------------------------ def sendLog(self, task): """! In case of no childAggregators iterate over device holders and devices. Write the log to the logServer. In case of childAggregators iterate over them first. @param task instance of class task """ taskName = task.taskName # get devices from device holders if there are no child aggregators if not self.childAggregators: for deviceHolder in self.deviceHolders: for device in deviceHolder.devices: logs = device.getLog(task) #return False when the device does not has this specific task if logs and self.logServer: self.logServer.writeLog(logs) # if there are child aggregators, get them first to trigger them sending the log else: for aggregator in self.childAggregators: aggregator.sendLog(task)
class WorkflowManager: TASK_STATUS_IN_PROGRESS = "in progess" TASK_STATUS_IN_QUEUE = "in queue" TASK_STATUS_FINISHED = "finished" taskID = 0 def __init__(self, testMode=False, errorProbability=0, logLevel=3, maxSizeDeviceHolder=10, maximalNumberOpenJobs=10): """! @param maxSizeDeviceHolder maximal size for deviceHolders @todo: can runtime and maxSizeDeviceHolder moved to selector ? Or first create selector and than add runtime to that ? better if we have multiple server ? """ self._runtime = None self._selector = None self._maxSizeDeviceHolder = maxSizeDeviceHolder self._maximalNumberOpenJobs = maximalNumberOpenJobs self._initTask = None self._testMode = testMode self._currentDeviceNames = [] self._errorProbability = int(errorProbability) loglevel = LogServer.ERROR if int(logLevel) == 0: loglevel = LogServer.DEBUG elif int(logLevel) == 1: loglevel = LogServer.INFO elif int(logLevel) == 3: loglevel = LogServer.ERROR else: loglevel = LogServer.FATAL self.logger = LogServer(__name__, console_level=loglevel, file_level=LogServer.DEBUG) self.logger.log().info("Workflow manager initiated") @property def maxSizeDeviceHolder(self): self.logger.log().debug("_maxSizeDeviceHolder " + str(self._maxSizeDeviceHolder)) return self._maxSizeDeviceHolder @property def config(self): return self._args @property def runtime(self): return self._runtime @property def selector(self): return self._selector @property def maximalNumberOpenJobs(self): self.logger.log().debug("_maximalNumberOpenJobs " + str(self._maximalNumberOpenJobs)) return self._maximalNumberOpenJobs def startFedDART(self, runtimeFile=None, deviceFile=None, maximal_numberDevices=-1): """! @param deviceFile path to already known devices @todo specify deviceFile """ self.logger.log().debug('start feddart server, config: ' + str(locals())) with open(runtimeFile) as runtimeFile: runtime = json.load(runtimeFile) self._runtime = DartRuntime(runtime["server"], runtime["client_key"], self._testMode, self._errorProbability, maximal_numberDevices, self.maximalNumberOpenJobs) self._selector = self.runtime.instantiateSelector( self._maxSizeDeviceHolder) if self._initTask: self.selector.initTask = self._initTask if deviceFile is not None: with open(deviceFile) as deviceFile: deviceFile = json.load(deviceFile) for deviceName in deviceFile: self.selector.addSingleDevice( deviceName, deviceFile[deviceName]["ipAdress"], deviceFile[deviceName]["port"], deviceFile[deviceName]["hardware_config"]) def removeDevice(self, deviceName): self.logger.log().debug("remove device. deviceName " + str(deviceName)) self.selector.removeDevice(deviceName) def _sendTaskRequest(self, task): """! Send the task to the selecor. Based on hardware requirements and needed devices the selector decide accept or reject the task @param task instance of task """ self.logger.log().info("requestTaskAcceptance") self.logger.log().debug(str(locals())) return self.selector.requestTaskAcceptance(task) def getTaskStatus(self, taskName): """! Ask the selector for the aggregator of the task. The aggregator knows the status of the task @param taskName name of the task @return string "in queue", "in progress" or "finished" """ self.logger.log().debug("getTaskStatus. task " + str(taskName)) if self.selector.taskInQueue(taskName): return self.TASK_STATUS_IN_QUEUE else: try: aggregator = self.selector.get_aggregator_of_task(taskName) taskFinished = aggregator.isTaskFinished() if taskFinished: self.logger.log().debug("TaskStatus " + str(self.TASK_STATUS_FINISHED)) return self.TASK_STATUS_FINISHED else: self.logger.log().debug("TaskStatus " + str(self.TASK_STATUS_IN_PROGRESS)) return self.TASK_STATUS_IN_PROGRESS except ValueError: self.logger.log().error( "workflowManager.getTaskStatus. there is no aggregator that handles task " + taskName) self.logger.log().debug(str(locals())) def getServerInformation(self): serverinfo = self.selector.runtime.get_ServerInformation() self.logger.log().debug("WorkflowManager.get_ServerInformation: " + str(locals())) return serverinfo def getTaskResult(self, taskName): """! Get the aggregator of the task and trigger the colllection of the results @param taskName name of the task @param boolean_aggregate option to directly aggregate the result (e.g federated averaging) @return taskResult aggregated result or collected results from devices """ self.logger.log().info("WorkflowManager.getTaskResult. taskName " + str(taskName)) if self.selector.taskInQueue(taskName): self.logger.log().debug(taskName + " still in queue.") return [] else: try: aggregator = self.selector.get_aggregator_of_task(taskName) taskResult = aggregator.requestAggregation() if self.getTaskStatus(taskName) == self.TASK_STATUS_FINISHED: self.stopTask(taskName) except ValueError: self.logger.log().error( "workflowManager.getTaskResult. there is no aggregator that handles task " + taskName) self.logger.log().debug(str(locals())) return [] return taskResult def getAllDeviceNames(self): """! Return all known devices with name to the end user @return: list of device names """ deviceNames = self.selector.deviceNames self.logger.log().debug("getAllDeviceNames. deviceNames " + str(deviceNames)) if self._currentDeviceNames == []: self._currentDeviceNames = deviceNames return deviceNames def getAvailableDeviceNames(self, listDeviceNames): currentDeviceNames = self.selector.deviceNames AvailableDeviceNames = [] for deviceName in currentDeviceNames: if deviceName in listDeviceNames: AvailableDeviceNames.append(deviceName) return AvailableDeviceNames def getNewDeviceNames(self): """! Return all known devices with name to the end user @return: list of device names """ oldDeviceNames = self._currentDeviceNames currentDeviceNames = self.selector.deviceNames newDeviceNames = [] for deviceName in currentDeviceNames: if deviceName not in oldDeviceNames: newDeviceNames.append(deviceName) self.logger.log().debug("getAllDeviceNames. deviceNames " + str(newDeviceNames)) return newDeviceNames def createCollection(self, deviceNames): """! Cluster the devices to a group. @param deviceNames: list of device names @return: collection """ return Collection(self, deviceNames) def stopTask(self, taskName): """! The task with the associated aggregator and deviceHolders is destroyed Should be done, when the task isn't needed anymore. @todo: add it as option to get_TaskResult ?! """ self.logger.log().info("stopTask. taskName " + str(taskName)) if self.selector.taskInQueue(taskName): self.selector.deleteTaskInQueue(taskName) else: self.selector.deleteAggregatorAndTask(taskName) def stopFedDART(self): self.logger.log().info("stopFedDART") self.runtime.stopRuntime() def createInitTask(self, parameterDict={}, model=None, hardwareRequirements={}, filePath=None, executeFunction=None, configFile=None): self.logger.log().debug("createInitTask. " + str(locals())) task = InitTask(parameterDict, model, hardwareRequirements, filePath, executeFunction, configFile) self._initTask = task def startTask(self, taskType=0, parameterDict={}, model=None, hardwareRequirements={}, filePath=None, executeFunction=None, configFile=None, priority=False, numDevices=-1, cluster=None): """! @param executeFunction name of function, which should be executed in filePath """ # defaultTask taskName = "task_" + str(WorkflowManager.taskID ) + "_" + datetime.datetime.now().isoformat() WorkflowManager.taskID += 1 if taskType == 0: task = DefaultTask(taskName, parameterDict, model, hardwareRequirements, filePath, executeFunction, configFile, numDevices) if taskType == 1: task = SpecificDeviceTask(taskName, parameterDict, model, hardwareRequirements, filePath, executeFunction, configFile) # request possibility from selector if task is feasible request_status = self._sendTaskRequest(task) #task accepted if request_status: self.selector.addTask2Queue(task, priority) self.logger.log().info("task accepted") # task rejected else: taskName = None self.logger.log().info( "task was not accepted - change your constraints?") self.logger.log().debug("start task." + str(locals())) return taskName