Python LogServer Exemples, feddart.logServer.LogServer Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : specificDeviceTask.py Projet : cc-hpc-itwm/Fed-DART

    def __init__(self,
                 taskName=None,
                 parameterDict={},
                 model=None,
                 hardwareRequirements={},
                 filePath=None,
                 executeFunction=None,
                 configFile=None):
        """!
        Instantiates a SpecificDeviceTask. Information about the configuration have to be 
        either provided in the function call or via a configFile.

        @param taskName (optional) Name of the task to be stored in the LogServer
        @param parameterDict Dict of device names and associated parameters to be used in training
        @param model (optional) Model to be executed
        @param hardwareRequirements (optional) mandatory hardware requirements
        @param filePath the path to the file to be called for execution on the device
        @param executeFunction name of function, which shoule be executed in filePath
        @param configFile (optional)
        """
        self._parameterDict = parameterDict
        self._model = model
        self._filePath = filePath
        self._hardwareRequirements = hardwareRequirements
        self._configFile = configFile
        self._executeFunction = executeFunction
        self._taskName = taskName

        self.logger = LogServer(__name__)
        self.logger.log().info("SpecificDeviceTask initiated")
        self.checkConfig()

Exemple #2

0

Afficher le fichier

    def __init__( self
                , name 
                , ipAdress
                , port
                , dartRuntime = None
                , physicalName = None
                , hardwareConfig = None
                , taskDict = {}
                , initTask = None
                ):
        self.name = name
        if physicalName == None:
            self.phyiscalName = self.name
        self.ipAdress = ipAdress
        self.port = port
        self._hardwareConfig = hardwareConfig
        self._openTaskDict = taskDict
        self._finishedTaskDict = {}
        self._dartRuntime = dartRuntime
        self._initTask = initTask
        if initTask is not None:
            self._initialized = False
        else: 
            self._initialized = True

        self.logger = LogServer(__name__)
        self.logger.log().info("DeviceSingle " + name + " instantiated")

Exemple #3

0

Afficher le fichier

Fichier : messageTranslator.py Projet : cc-hpc-itwm/Fed-DART

 def convertDart2Python(cls, results, deviceName):
     device_result = { 'duration': None
                     , 'result': None
                     }
     resultID = None
     logger = LogServer(__name__)
     for result in results['results']:
         workerName = result['worker'].split("-",1)[0]
         if 'success' in result.keys() and deviceName == workerName:
             device_result['duration'] = result['duration']
             device_result['result'] = cls.unpackBackMessage(result['success'])
             resultID = result['id']
         if 'error' in result.keys() and deviceName == workerName:
             device_result['duration'] = result['duration']
             device_result['result'] = {"error": result['error']}
             resultID = result['id']
     logstring = ""
     if device_result['result'] is not None:
         logstring = logstring + str(device_result['duration']) + " "
         logstring = logstring + str(resultID) + " "
         for keys,values in device_result['result'].items():
             logstring = logstring + str(keys) + " "
             logstring = logstring + str(values)
         
     logger.log().debug("MessageTranslator.convertDart2Python " + logstring )
     return device_result, resultID

Exemple #4

0

Afficher le fichier

Fichier : dartRuntime.py Projet : cc-hpc-itwm/Fed-DART

 def __init__( self
             , server 
             , client_key
             , testMode
             , errorProbability
             , maximal_number_devices = -1
             , maximalNumberOpenJobs = 10
             , **kwargs
             ):
     """!
     @param maximal_number_devices maximal number of devices for this runtime
     @param maximalNumberOpenJobs maximal number of allowed maximal jobs
     @param registeredDevices dict with key device name and value instance of class device
     @param messageTranslator translator between Python and DART format
     @param selector instance of selector
     @param server     the server addr, e.g., "https://127.0.0.1:7777"
     @param client_key the key of the client for identification (unused atm)
     @param counterJobs int number of open jobs on server
     """
     if testMode:
         self._restAPIClient = Client(server, client_key, probability_error = errorProbability, testmode = True)
     else:
         self._restAPIClient = Client(server, client_key)
     self._maximal_number_devices = maximal_number_devices
     self._maximalNumberOpenJobs = maximalNumberOpenJobs
     self._registeredDevices = {}
     self._messageTranslator = MessageTranslator()
     self._selector = None
     self._counterJobs = 0 #in our case Jobs = Task
     
     self.logger = LogServer(__name__)
     self.logger.log().info("DartRuntime initiated")

Exemple #5

0

Afficher le fichier

Fichier : defaultTask.py Projet : cc-hpc-itwm/Fed-DART

    def __init__(self,
                 taskName=None,
                 parameterlists={},
                 model=None,
                 hardwareRequirements={},
                 filePath=None,
                 configFile=None,
                 numDevices=-1):
        """!
        Instantiates a DefaultTask. Information about the configuration have to be 
        either provided in the function call or via a configFile.

        @param taskName (optional) Name of the task to be stored in the LogServer
        @param parameterlists List of parameters to be used in training
        @param model (optional) Model to be executed
        @param hardwareRequirements (optional) mandatory hardware requirements
        @param filePath the path to the file to be called for execution on the device
        @param configFile (optional) the configuration file of this task
        @param numDevices the total amount of devices on which the task shall to be executed
        """
        self._parameterlists = parameterlists
        self._model = model
        self._hardwareRequirements = hardwareRequirements
        self._numDevices = numDevices
        self._configFile = configFile
        self._filePath = filePath
        self._taskName = taskName
        self.logger = LogServer(__name__)
        self.logger.log().info('DefaultTask initiated')
        self.checkConfig()

Exemple #6

0

Afficher le fichier

Fichier : messageTranslator.py Projet : cc-hpc-itwm/Fed-DART

 def convertPython2Dart(cls, list_client, list_params):
     """convert default message to dart format and return feasible format"""
     task_list = []
     logger = LogServer(__name__)
     #TODO: serialize parameters
     logstring = ""
     for client, params in zip(list_client, list_params):
         dict_client = {'location': client, 'parameter': cls.packMessage(params)}
         logstring = logstring + " " + str({'location': client, 'parameter': params})
         task_list.append(dict_client)
     logger.log().debug("MessageTranslator.convertPython2Dart " + logstring)
     return task_list

Exemple #7

0

Afficher le fichier

Fichier : deviceAggregator.py Projet : cc-hpc-itwm/Fed-DART

    def __init__(self,
                 devices,
                 task,
                 maxSizeDeviceHolder=10,
                 maxNumDeviceHolder=5,
                 maxNumChildAggregators=5,
                 logServer=None):
        """!
        @param task instance of a task
        @param deviceHolders list of deviceHolders
        @param childAggregators list of childAggregators
        @param maxSizeDeviceHolder maximum number of devices for deviceHolder
        @param maxNumDeviceHolder maximum number of deviceHolders per aggregator/childaggregator
        @param maxNumChildAggregators maximum number of allowed childAggregators
        @param aggregatedResult aggregated result of local task results
        @param logServer storage of the results and/or aggregated result 

        """
        self._task = task
        self._maxSizeDeviceHolder = maxSizeDeviceHolder
        self._maxNumDeviceHolder = maxNumDeviceHolder
        self._maxNumChildAggregators = maxNumChildAggregators
        self._deviceHolders = []
        self._childAggregators = []
        numDevices = task.numDevices
        if maxNumChildAggregators > 0:
            amount_childAggregators = self.compute_required_childAggregators_count(
                numDevices)
            self._childAggregators = [
                DeviceAggregator(
                    [
                    ]  #init at first without devices; devices will be added at the end of constructor
                    ,
                    task,
                    maxSizeDeviceHolder=maxSizeDeviceHolder,
                    maxNumDeviceHolder=maxNumDeviceHolder,
                    maxNumChildAggregators=0,
                    logServer=logServer)
                for _ in range(amount_childAggregators)
            ]
        self._aggregatedResult = None
        self._logServer = logServer
        self._instantiateDeviceHolders()
        for device in devices:
            self.addSingleDevice(device)  #add here task to devices

        self.logger = LogServer(__name__)
        self.logger.log().info("Aggregator initiated")

Exemple #8

0

Afficher le fichier

Fichier : dart.py Projet : cc-hpc-itwm/Fed-DART

    def __init__(self,
                 server,
                 client_key,
                 probability_error=0,
                 testmode=False):
        self.server = server
        self.key = client_key
        self.logger = LogServer(__name__)
        self.logger.log().info("dart client initialized")
        self.testmode = testmode

        if self.testmode:

            self.probability_error = probability_error
            self.worker_list = []
            self.job_list = []

Exemple #9

0

Afficher le fichier

    def __init__(self,
                 taskName,
                 parameterlists={},
                 model=None,
                 hardwareRequirements={},
                 filePath=None,
                 configFile=None):
        self._parameterlists = parameterlists
        self._model = model
        self._hardwareRequirements = hardwareRequirements
        self._configFile = configFile
        self._filePath = filePath
        self._taskName = taskName
        self.logger = LogServer(__name__)
        self.logger.log().info('SpecificParameterTask initiated')

        self.checkConfig()

Exemple #10

0

Afficher le fichier

Fichier : selector.py Projet : cc-hpc-itwm/Fed-DART

    def __init__(self, runtime=None, maxSizeDeviceHolder=-1, initTask=None):
        """!
        Instantiate a Selector (singleton).

        @param runtime The runtime for the connections to physical devices
        @param maxSizeDeviceHolder Maximal amount of devices in a device holder
        @param devices List of connected devices
        @param aggregators  List of aggregators
        @param device_holders List of device_holders
        @param taskQueue List of tasks in queue
        @param initTask task which must be executed at each device firstly
        """
        self._runtime = runtime
        self._maxSizeDeviceHolder = maxSizeDeviceHolder
        self._devices = []
        self._aggregators = []
        self._device_holders = []
        self._taskQueue = []
        self._initTask = initTask
        self.logger = LogServer(__name__)
        self.logger.log().info('Selector initiated')

Exemple #11

0

Afficher le fichier

Fichier : workflowManager.py Projet : cc-hpc-itwm/Fed-DART

    def __init__(self,
                 testMode=False,
                 errorProbability=0,
                 logLevel=3,
                 maxSizeDeviceHolder=10,
                 maximalNumberOpenJobs=10):
        """!
        @param maxSizeDeviceHolder maximal size for deviceHolders

        @todo: can runtime and maxSizeDeviceHolder moved to selector ?
        Or first create selector and than add runtime to that ? better
        if we have multiple server ?
        """
        self._runtime = None
        self._selector = None
        self._maxSizeDeviceHolder = maxSizeDeviceHolder
        self._maximalNumberOpenJobs = maximalNumberOpenJobs
        self._initTask = None
        self._testMode = testMode
        self._currentDeviceNames = []
        self._errorProbability = int(errorProbability)

        loglevel = LogServer.ERROR
        if int(logLevel) == 0:
            loglevel = LogServer.DEBUG
        elif int(logLevel) == 1:
            loglevel = LogServer.INFO
        elif int(logLevel) == 3:
            loglevel = LogServer.ERROR
        else:
            loglevel = LogServer.FATAL

        self.logger = LogServer(__name__,
                                console_level=loglevel,
                                file_level=LogServer.DEBUG)
        self.logger.log().info("Workflow manager initiated")

Exemple #12

0

Afficher le fichier

Fichier : dart.py Projet : cc-hpc-itwm/Fed-DART

class Client:
    ##
    # Initializes the client
    # @param server     the server addr, e.g., "https://127.0.0.1:7777"
    # @param client_key the key of the client for identification (unused atm)
    def __init__(self,
                 server,
                 client_key,
                 probability_error=0,
                 testmode=False):
        self.server = server
        self.key = client_key
        self.logger = LogServer(__name__)
        self.logger.log().info("dart client initialized")
        self.testmode = testmode

        if self.testmode:

            self.probability_error = probability_error
            self.worker_list = []
            self.job_list = []

    def getJob(self, jobName):
        """!
            Get the job instance by name - used in testmode
        """
        self.logger.log().info("Client.getJob: " + jobName)
        for job in self.job_list:
            if job.name == jobName:
                return job

        self.logger.log().info("Client.getJob: " + jobName + " not found")
        return None

    ##
    # Stop the servers
    def stop_servers(self):
        """!
        testmode: simulate false request codes 
        """
        self.logger.log().info("Client.stop_servers")
        if self.testmode:
            if random.uniform(0, 1) < self.probability_error:
                raise Exception('response not ok')
            return

        r = requests.delete(self.server + "/server/",
                            json={'key': self.key},
                            verify=False)
        if r.status_code != requests.codes.ok:
            raise Exception('response not ok')

    ##
    # Gets information about the servers
    #
    # The return type has the following structure
    # {
    #   'servers' : [
    #     {'host' : '<host_name>', 'port' : '<port_name>'}
    #   ]
    # }
    def get_server_information(self):
        if self.testmode:
            raise NotImplementedError("not implemented yet!")
        else:
            r = requests.get(self.server + "/server/",
                             json={'key': self.key},
                             verify=False)
            if r.status_code != requests.codes.ok:
                raise Exception('response not ok')

            return json.loads(r.content)

    ##
    # Adds workers
    #
    # @param hosts            list of all the hosts
    # @param workers_per_host the amount of workers to add per host
    # @param name             the name of the worker
    # @param capabilities     list of the capabilities of the workers
    # @param shm_size         shared memory size
    # @param ssh_options      an object with the following attributes
    #                         { "username": "******", "port": "...", "public-key": "...", "private-key": "..." }
    def add_worker(self,
                   hosts,
                   workers_per_host,
                   worker_name,
                   capabilities,
                   shm_size,
                   ssh={}):
        self.logger.log().debug("Client.add_worker " + str(locals()))
        if self.testmode:
            #capabilities, hosts is a list, unzip it
            for host, capability in zip(hosts, capabilities):
                worker = Worker(self.key, host, workers_per_host, worker_name,
                                capability, shm_size)
                self.worker_list.append(worker)
            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error(
                    "Client.add_worker: could not add worker " + str(locals()))
                raise Exception('response not ok')
        else:
            r = requests.post(self.server + "/worker/",
                              json={
                                  'key': self.key,
                                  'name': worker_name,
                                  'hosts': hosts,
                                  'workers_per_host': workers_per_host,
                                  'capabilities': capabilities,
                                  'shm_size': shm_size,
                                  'ssh': ssh
                              },
                              verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error(
                    "Client.add_worker: could not add worker " + str(locals()))
                raise Exception('response not ok')

    ##
    # Removes workers from the specified hosts
    #
    # @param hosts a list of hosts
    # @param ssh_options      an object with the following attributes { "username": "******", "port": "...", "public-key": "...", "private-key": "..." }
    def remove_workers(self, hosts, ssh={}):
        self.logger.log().debug("Client.remove_workers " + str(locals()))
        if self.testmode:
            for worker in self.worker_list:
                if worker.hosts == hosts:
                    self.worker_list.remove(worker)
            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error(
                    "Client.remove_workers: could not remove workers " +
                    str(locals()))
                raise Exception('response not ok')
        else:
            r = requests.delete(self.server + "/worker/",
                                json={
                                    'key': self.key,
                                    'hosts': hosts,
                                    'ssh': ssh
                                },
                                verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error(
                    "Client.remove_workers: could not remove workers " +
                    str(locals()))
                raise Exception('response not ok')

    ##
    # Get workers
    #
    # @param hosts            list of all the hosts
    # @param workers_per_host the amount of workers to add per host
    # @param name             the name of the worker
    # @param capabilities     list of the capabilities of the workers
    # @param shm_size         shared memory size
    def get_workers(self):
        self.logger.log().debug("Client.get_workers " + str(locals()))
        if self.testmode:
            list_worker = []
            for worker in self.worker_list:
                dict_worker = {}
                dict_worker['name'] = worker.worker_name
                dict_worker['count'] = 1
                dict_worker['capabilities'] = ''
                list_worker.append(dict_worker)

            self.logger.log().debug("Client.get_workers: " + str(list_worker))
            return {'workers': list_worker}
        else:
            r = requests.get(self.server + "/worker/",
                             json={'key': self.key},
                             verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error("Client.get_workers: " + str(locals()))
                raise Exception('response not ok')
            return json.loads(r.content)

    ##
    # Adds a job definition
    #
    # @param name        the name of the job
    # @param module_path the path to the module on the clients
    # @param method      the method from the module to execute
    def add_job(self, name, module_path, method):
        self.logger.log().debug("Client.add_job " + str(locals()))
        if self.testmode:
            job = Job(name, module_path, method)
            self.job_list.append(job)
            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error("Client.add_job: could not add job. " +
                                        str(locals()))
                raise Exception('response not ok')
        else:
            r = requests.post(self.server + "/job/",
                              json={
                                  'key': self.key,
                                  'name': name,
                                  'module_path': module_path,
                                  'method': method
                              },
                              verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error("Client.add_job: could not add job. " +
                                        str(locals()))
                raise Exception('response not ok')

    ##
    # Adds tasks to a specific job
    #
    # @param jobName              the name of the job
    # @param location_and_parameters [ { 'location' : '...', 'parameter' : ' ...'}, ...] list
    def add_tasks(self, jobName, location_and_parameters):
        self.logger.log().debug("Client.add_tasks " + str(locals()))
        if self.testmode:
            rightJob = self.getJob(jobName)
            for task in location_and_parameters:
                workerName = task['location']
                taskParameter = task['parameter']
                for worker in self.worker_list:
                    if worker.worker_name == workerName:
                        task = Task(worker, taskParameter)
                        rightJob.task_list.append(task)

            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error(
                    "Client.add_tasks: could not add tasks. " + str(locals()))
                raise Exception('response not ok')
            else:
                rightJob.start_computation()

        else:
            r = requests.post(self.server + "/job/" + jobName + "/tasks/",
                              json={
                                  'key':
                                  self.key,
                                  'location_and_parameters':
                                  location_and_parameters
                              },
                              verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error(
                    "Client.add_tasks: could not add tasks. " + str(locals()))
                raise Exception('response not ok')

    ##
    # Gets information about a job
    #
    # The return type has the following structure
    # {
    #   'job' : {
    #     'id' : '....',
    #     'status' : '...',
    #     'config' : {
    #        'python_home' : '...',
    #        'output_directory' : '...',
    #        'module' : '...',
    #        'is_module_path' : '...',
    #        'method' : '...'
    #     }
    #   }
    # }
    #
    # @param job  the name of the job
    def get_job_info(self, jobName):
        self.logger.log().debug("Client.get_job_info " + str(locals()))
        if self.testmode:
            self.logger.log().error("Client.get_job_info: not implemented.")
            raise NotImplementedError("not implemented yet!")
        else:
            r = requests.get(self.server + "/job/" + jobName + "/",
                             json={'key': self.key},
                             verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error("Client.get_job_info: " +
                                        str(locals()))
                raise Exception('response not ok')
            return json.loads(r.content)

    ##
    # Stops a job
    #
    # @param job the job name
    def stop_job(self, jobName):
        self.logger.log().debug("Client.stop_job " + str(locals()))
        if self.testmode:
            rightJob = self.getJob(jobName)
            self.job_list.remove(rightJob)
            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error("Client.stop_job failed. " +
                                        str(locals()))
                raise Exception('response not ok')
        else:
            r = requests.delete(self.server + "/job/" + jobName + "/",
                                json={'key': self.key},
                                verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error("Client.stop_job failed. " +
                                        str(locals()))
                raise Exception('response not ok')

    ##
    # Gets the job status of a job
    #
    # @return the job status
    def get_job_status(self, jobName):
        self.logger.log().debug("Client.get_job_status " + str(locals()))
        if self.testmode:
            job_exists = False
            for job in self.job_list:
                if job.name == jobName:
                    job_exists = True

            if job_exists == False:
                return job_status(0)
            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error("Client.get_job_status " +
                                        str(locals()))
                raise Exception('response not ok')
            else:
                return job_status(1)
        else:
            r = requests.get(self.server + "/job/" + jobName + "/status/",
                             json={'key': self.key},
                             verify=False)
            if r.status_code == requests.codes.not_found:
                return job_status.unknown
            if r.status_code != requests.codes.ok:
                self.logger.log().error("Client.get_job_status " +
                                        str(locals()))
                raise Exception('response not ok')
            response = json.loads(r.content)
            return job_status(int(response['job']['status']))

    ##
    # Gets results of the specified job
    #
    # Gets at most 'amount' different job results. Note that
    # this function does not delete the results from the
    # server. Hence, successive calls will return the same
    # results.
    #
    # The return type has the following structure
    # {
    #   'results' : [
    #       {
    #         'id' : '...',
    #         'job' : '...',
    #         'worker' : '...',
    #         'start_time' : '...',
    #         'duration' : '...',
    #         'success' : '...' or 'error' : '...'
    #       },
    #       {
    #           ...
    #       },
    #       ...
    #   ],
    #   'job' : { 'id' : '...', 'status' : '...'}
    # }
    #
    # @param job          the job name
    # @param amount       the maximal amounts of jobs to get
    # @param worker_regex a regex that the worker of the result has to match. Empty regex matches everything.
    def get_job_results(self, jobName, amount, worker_regex=""):
        self.logger.log().debug("Client.get_job_results " + str(locals()))
        if self.testmode:
            rightJob = self.getJob(jobName)
            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error("Client.get_job_results " +
                                        str(locals()))
                raise Exception('response not ok')
            if rightJob:
                return rightJob.resultDict
            else:
                self.logger.log().info(
                    "Client.get_job_results: no such job running on server: " +
                    jobName)
                return {'results': [], 'job': {}}
        else:
            r = requests.get(self.server + "/job/" + jobName + "/results/",
                             json={
                                 'key': self.key,
                                 'amount': amount,
                                 'worker_regex': worker_regex
                             },
                             verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error("Client.get_job_results " +
                                        str(locals()))
                raise Exception('response not ok')
            return json.loads(r.content)

    ##
    # Removes a job result from the server
    #
    # @param job    the name of the job
    # @param result the id of the result
    def delete_job_result(self, jobName, resultID):
        self.logger.log().debug("Client.delete_job_result " + str(locals()))
        if self.testmode:
            rightJob = self.getJob(jobName)
            rightJob.delete(resultID)
            if random.uniform(0, 1) < self.probability_error:
                self.logger.log().error("Client.delete_job_result " +
                                        str(locals()))
                raise Exception('response not ok')
        else:
            r = requests.delete(self.server + "/job/" + jobName + "/results/" +
                                resultID + "/",
                                json={'key': self.key},
                                verify=False)
            if r.status_code != requests.codes.ok:
                self.logger.log().error("Client.delete_job_result " +
                                        str(locals()))
                raise Exception('response not ok')

Exemple #13

0

Afficher le fichier

Fichier : defaultTask.py Projet : cc-hpc-itwm/Fed-DART

class DefaultTask(TaskBase):
    """!
    DefaultTask is a subclass of TaskBase, and therefore a specific category of tasks.
    As default task, we define to execute a given task with the same 
    parameter settings (training, etc) on all available devices (possibly with a max number) 
    that fulfill the (optional) hardware requirements.

    """
    def __init__(self,
                 taskName=None,
                 parameterlists={},
                 model=None,
                 hardwareRequirements={},
                 filePath=None,
                 configFile=None,
                 numDevices=-1):
        """!
        Instantiates a DefaultTask. Information about the configuration have to be 
        either provided in the function call or via a configFile.

        @param taskName (optional) Name of the task to be stored in the LogServer
        @param parameterlists List of parameters to be used in training
        @param model (optional) Model to be executed
        @param hardwareRequirements (optional) mandatory hardware requirements
        @param filePath the path to the file to be called for execution on the device
        @param configFile (optional) the configuration file of this task
        @param numDevices the total amount of devices on which the task shall to be executed
        """
        self._parameterlists = parameterlists
        self._model = model
        self._hardwareRequirements = hardwareRequirements
        self._numDevices = numDevices
        self._configFile = configFile
        self._filePath = filePath
        self._taskName = taskName
        self.logger = LogServer(__name__)
        self.logger.log().info('DefaultTask initiated')
        self.checkConfig()

    @property
    def numDevices(self):
        return self._numDevices

    @property
    def parameterlists(self):
        """!
        property: parameterlists. Implements the getter.

        @todo: define the format of parameter lists        
        """
        return self._parameterlists

    @parameterlists.setter
    def parameterlists(self, new_parameterlist):
        """!
        property: parameterlists. Implements the setter.

        @param new_parameterlist the new list of parameters
        """
        self._parameterlists = new_parameterlist

    @property
    def taskName(self):
        """!
        property: taskName. Implements the getter.

        The taskName identifies the task (in combination with a timestamp) in the LogServer.
        
        """
        return self._taskName

    @taskName.setter
    def taskName(self, new_taskName):
        """!
        property: taskName. Implements the setter.

        @param new_taskName the new name of the task
        """
        self._taskName = new_taskName

    @property
    def hardwareRequirements(self):
        """!
        property: hardwareRequirements. Implements the getter.

        Hardware requirements define the (optional) mandatory requirements for the devices.
        @todo: define the format of hardware requirements
        """
        return self._hardwareRequirements

    @hardwareRequirements.setter
    def hardwareRequirements(self, new_hardwareRequirements):
        """!
        property: hardwareRequirements. Implements the setter.

        @param new_hardwareRequirements the new mandatory hardware requirements
        """
        self._hardwareRequirements = new_hardwareRequirements

    @property
    def model(self):
        """!
        property: model. Implements the getter.

        A model is an optional parameter for a task. In case a model is provided,
        the model is sent to the devices where it will be trained/used for inference.
        In case no model is provided, the task executes some given functions on 
        the device.
        """
        return self._model

    @model.setter
    def model(self, new_model):
        """!
        property: model. Implements the setter.

        @param new_model the new model to be used
        """
        self._model = new_model

    @property
    def filePath(self):
        """!
        property: filePath. Implements the getter.

        A model is an optional parameter for a task. In case a model is provided,
        the model is sent to the devices where it will be trained/used for inference.
        In case no model is provided, the task executes some given functions on 
        the device.
        """
        return self._filePath

    @filePath.setter
    def filePath(self, new_filePath):
        """!
        property: filePath. Implements the setter.

        @param new_filePath the new file path 
        """
        self._filePath = new_filePath

    @property
    def configFile(self):
        """!
        property: configFile. Implements the getter.

        The configFile is an optional file provided at instantiation to describe the task configuration, i.e.
        it contains the parameters for training, (optionally) the model to be executed,
        which hardwareRequirements are mandatory, the name of the task, as well as the path to the 
        python script to be executed.

        @todo define the structure of the configFile
        """
        return self._configFile

    def loadConfigFile(self):
        """!
        @todo implement this function
        """
        return

    def writeConfig(self, taskName, parameterlists, hardwareRequirements,
                    model, filePath):
        """!
        @todo implement

        Writes the configuration of the task to disk.

        @param taskName Name of the task
        @param parameterlists List of parameters
        @param hardwareRequirements mandatory hardware requirements
        @param model model to be trained
        @param filePath path to the file to be executed on the device
        """
        pass

    def checkConfig(self):
        """!
        This method ensures that all necessary parameters for the given task are
        provided: either by given parameters in the instantiation of the subclass
        or by a given filepath to a configuration file.

         @todo define configfile
        """

        config = {}
        valid = False
        if self._parameterlists is None:
            if self._configFile is None:
                raise ValueError("No configuration provided")
            else:
                config = self.loadConfigFile()
        else:
            config['parameters'] = self._parameterlists
            config['hardwareRequirements'] = self._hardwareRequirements
            config['model'] = self._model
            config['taskName'] = self._taskName
            config['filePath'] = self._filePath

        # check the configuration

        if self._parameterlists is None:
            self.writeConfig(config['taskName'], config['parameters'],
                             config['hardwareRequirements'], config['model'],
                             config['filePath'])

        self.logger.log().info("check config: " + str(locals()))
        return valid

Exemple #14

0

Afficher le fichier

class DeviceSingle(AbstractDeviceBase):
    """!
    DeviceSingle is the interface to the real pyhsical device.

    @param name name given by the user or runtime
    @param ip_address ip address of physical device
    @param physical_name name by which the device connted hisself to the server
    @param hardwareConfig hardware properties of physical device
    @param openTaskDict dictionary of tasks, which are/will be running on the device
           format: {"task_name": {'param1': value , 'param2': value}}
    @param finishedTaskDict dictionary of tasks, which have already a result
           format: {'duration': value, 'result': {'result_0': value, 'result_1': value}}
    @param dartRuntime runtime for connection to physical device
    @param initialized boolean , if device already has received the init task 
    """
    def __init__( self
                , name 
                , ipAdress
                , port
                , dartRuntime = None
                , physicalName = None
                , hardwareConfig = None
                , taskDict = {}
                , initTask = None
                ):
        self.name = name
        if physicalName == None:
            self.phyiscalName = self.name
        self.ipAdress = ipAdress
        self.port = port
        self._hardwareConfig = hardwareConfig
        self._openTaskDict = taskDict
        self._finishedTaskDict = {}
        self._dartRuntime = dartRuntime
        self._initTask = initTask
        if initTask is not None:
            self._initialized = False
        else: 
            self._initialized = True

        self.logger = LogServer(__name__)
        self.logger.log().info("DeviceSingle " + name + " instantiated")
        

    def __str__(self):
        return self.name
            
    @property
    def hardwareConfig(self):
        """!
        property: hardwareConfig. Implements the getter
        """
        return self._hardwareConfig

    @hardwareConfig.setter
    def hardwareConfig(self, newHardwareConfig):
        """!
        property: hardwareConfig. Implements the setter

        @param newHardwareConfig the new hardware config
        """
        self._hardwareConfig = newHardwareConfig
        return

    @property
    def openTaskDict(self):
        """!
        property: openTaskDict. Implements the getter
        """
        return self._openTaskDict

    @openTaskDict.setter
    def openTaskDict(self, newDict):
        """!
        property: openTaskDict. Implements the setter

        @param newDict the new open task dict
        """
        self._openTaskDict = newDict

    @property
    def finishedTaskDict(self):
        """!
        property: finishedTaskDict. Implements the getter
        """
        return self._finishedTaskDict

    @finishedTaskDict.setter
    def finishedTaskDict(self, newDict):
        """!
        property: finishedTaskDict. Implements the setter

        @param newDict the new finished task dict
        """
        self._finishedTaskDict = newDict
    
    @property
    def dartRuntime(self):
        """!
        property: dartRuntime. Implements the getter
        """
        return self._dartRuntime

    @dartRuntime.setter
    def dartRuntime(self, newRuntime):
        """!
        property: dartRuntime. Implements the setter

        @param newRuntime the new runtime
        """
        self._dartRuntime = newRuntime
    
    @property
    def initTask(self):
        """!
        property: initTask. Implements the getter
        """
        return self._initTask

    @initTask.setter
    def initTask(self, newInitTask):
        """!
        property: newInitTask. Implements the setter. 
        Update all devices directly with new init task

        @param newInitTask instance of class task
        @todo: check if new init task result is returned
        """
        self._initTask = newInitTask

    @property
    def initialized(self):
        """!
        property: initialized. Implements the getter
        """
        if self._initialized == False:
            initTaskName = self.initTask.taskName
            if self.has_taskResult(initTaskName):
                init_result = self.get_taskResult(initTaskName)
                if init_result.resultList[0] is None:
                    self._initialized = True
        return self._initialized

    @initialized.setter
    def initialized(self, boolInit):
        """!
        property: initialized. Implements the setter

        @param boolInt a boolean
        """
        self._initialized = boolInit
        
    def isOpenTask(self, taskName):
        """!
        Check if device has an open task with such a name.

        @param taskName string with task name

        @return boolean
        """
        if taskName in self._openTaskDict.keys():
            return True
        else:
            return False

    def removeOpenTask(self, taskName):
        """!
        Check if device has an open task with such a name and remove it.
        In the other case through a KeyError

        @param taskName string with task name
        """
        if taskName in self._openTaskDict.keys():
            del self._openTaskDict[taskName]
        else:
            raise KeyError
        return

    def get_number_openTasks(self):
        """!
        Determine the number of open tasks of the device.

        @return int
        """
        return len(self.openTaskDict)

    def is_online(self):
        """! 
        Check if the device is currently reachable.

        @return boolean 
        @todo implement the check
        """
        return True 

    def getOpenTaskParameter(self, taskName):
        """!
        Return the parameter of an open task. 
        Raise an error when a task with such a name is not in openTaskDict

        @param taskName string with task name

        @return dict with format {'param1': value , 'param2': value}
        """
        if taskName in self.openTaskDict.keys():
            return self.openTaskDict[taskName]
        else:
            raise KeyError("Open task with name", taskName, "doesn't exist!")

    def _getFinishedTaskResult(self, taskName):
        """!
        Return the parameter of an already finished task. 
        Raise an error when a task with such a name is not in finishedTaskDict

        @param taskName string with task name

        @return dict with format {'duration': value, 'result': {'result_0': value, 'result_1': value}}
        """
        if taskName in self.finishedTaskDict.keys():
            return self.finishedTaskDict[taskName]
        else:
            raise KeyError("Finished task with name", taskName, "doesn't exist!")
                   
    def getLog(self, taskName):
        """!
        Get the log of the device results for this task. In the moment we get
        these results form the finishedTaskDict. In the future there can be a more
        advanced way like a database.
        @param task instance of class task
        """        
        if taskName in self._finishedTaskDict.keys():
            return self._getFinishedTaskResult(taskName)
        else:
            return False

    def get_taskResult(self, taskName):
        """!
        Check if the taskName is known from current or old tasks. If true we check if the result
        is already logged in finishedTaskDict. If not get the result from runtime and
        check with has_taskResult is the result is only a place holder for an incoming result.
        If not remove the task from the dict of open task

        @param taskName name of the task

        @return instance of taskResult
        """
        if self.hasTask(taskName):
            if taskName in self._finishedTaskDict.keys():
                return self._getFinishedTaskResult(taskName)
            else:
                result, resultID = self.dartRuntime.get_TaskResult(taskName, self.name)
                taskResult = TaskResult( self.name
                                       , result["duration"]
                                       , result["result"]
                                       )
                if self.has_taskResult(taskName):
                    self.removeOpenTask(taskName)
                    self._addFinishedTask(taskName, taskResult)
                    self.dartRuntime.remove_result_from_server(taskName, resultID)
                return taskResult
        else:
            raise KeyError("No task with name " + taskName) 
        
    def startTask(self, task):
        """!
        Before starting a task, the user must it add to the device.
        To start a task the runtime must have already a job with the taskName. Add this 
        job if necessary. Afterwards broadcast a list with only one device entry to runtime.

        @param task instance of task
        """
        taskName = task.taskName
        if not self.hasTask(taskName):
            raise ValueError("Add the task >>" + taskName + "<< to device >>" + self.name + "<< before starting the task!")
        #return 0 means unknown
        if self.dartRuntime.get_job_status(taskName) == 0:
            self.dartRuntime.add_job( taskName
                                    , task.filePath
                                    , task.executeFunction
                                    )
        self.dartRuntime.broadcastTaskToDevices( taskName
                                               , [self.name]
                                               , [self.getOpenTaskParameter(taskName)]
                                               )
        
    def has_taskResult(self, taskName):
        """!
        Check the taskResult. If the result has the key duration
        with value None than the device hasn't anything returned yet.
        @param taskName name of the task

        @return boolean True or False

        @todo atm hacky because of REST API. To check if result is there,
        we must get it and check the components of the result.
        """
        if self.hasTask(taskName):
            if taskName in self._finishedTaskDict.keys():
                return True 
            else:
                result, resultID = self.dartRuntime.get_TaskResult(taskName, self.name)
                if result['duration'] == None:
                    return False
                else:
                    return True
        else:
            raise KeyError("No task with name " + taskName) 
        
    def addTask(self, taskName, taskParameter):
        """!
        Add a new open task with name and parameters to openTaskDict

        @param taskName string of task name
        @param taskParamerer dict in format {'param1': value , 'param2': value}
        """
        if taskName in self._openTaskDict.keys():
            raise KeyError(taskName + " already in openTaskDict!")
        tasks = self._openTaskDict
        tasks[taskName] = taskParameter
        self.openTaskDict = tasks
        
    def _addFinishedTask(self, taskName, taskResult):
        """!
        Add a new finished task with name and results to finishedTaskDict

        @param taskName string of task name
        @param taskResult dict with format {'duration': value
                                           , 'result': {'result_0': value, 'result_1': value}
                                           }
        """
        if taskName in self._finishedTaskDict.keys():
            raise KeyError(taskName + " already in finishedTaskDict!")
        tasks = self._finishedTaskDict
        tasks[taskName] = taskResult
        self.finishedTaskDict = tasks
        

    def hasTask(self, taskName):
        """!
        Check if the device has a open task with such a name

        @param taskName name of task

        @return boolean True/False
        """
        if taskName in self._openTaskDict.keys() or taskName in self._finishedTaskDict.keys():
            return True
        else:
            return False

Exemple #15

0

Afficher le fichier

class SpecificParameterTask(TaskBase):
    def __init__(self,
                 taskName,
                 parameterlists={},
                 model=None,
                 hardwareRequirements={},
                 filePath=None,
                 configFile=None):
        self._parameterlists = parameterlists
        self._model = model
        self._hardwareRequirements = hardwareRequirements
        self._configFile = configFile
        self._filePath = filePath
        self._taskName = taskName
        self.logger = LogServer(__name__)
        self.logger.log().info('SpecificParameterTask initiated')

        self.checkConfig()

    @property
    def parameterlists(self):
        return self._parameterlists

    @parameterlists.setter
    def parameterlists(self, new_parameterlist):
        self._parameterlists = new_parameterlist

    @property
    def model(self):
        return self._model

    @model.setter
    def model(self, new_model):
        self._model = new_model

    @property
    def specificDevices(self):
        return list(self._parameterlists.keys())

    @property
    def configFile(self):
        return self._configFile

    def writeConfig(self, parameterlists, model, hardwareRequirements):
        raise NotImplementedError("not implemented yet")

    def checkConfig(self):
        config = {}
        valid = False
        if self._parameterlists is None:
            if self._configFile is None:
                raise ValueError("No configuration provided")
            else:
                config = self.loadConfigFile()
        else:
            config['parameters'] = self._parameterlists
            config['hardwareRequirements'] = self._hardwareRequirements
            config['model'] = self._model
            config['taskName'] = self._taskName
            config['filePath'] = self._filePath

        # check the configuration
        # todo: define configfile

        if self._parameterlists is None:
            self.writeConfig(config['taskName'], config['parameters'],
                             config['hardwareRequirements'], config['model'],
                             config['filePath'])

        return valid

    def loadConfigFile(self):
        raise NotImplementedError("not implemented yet")

Exemple #16

0

Afficher le fichier

Fichier : selector.py Projet : cc-hpc-itwm/Fed-DART

class Selector():
    """! 
    Selector has the knowledge about all connected devices.
    The Selector is responsible to shedule devices to the
    device holder based on (optional) hardware requirements.
    """
    """!
    @param _maxNumDeviceHolder maximal number of deviceHolders per aggregator/childaggregator
    @param _maxNumChildAggregators maximal number of allowed childAggregators
    """
    _maxNumDeviceHolder = 2
    _maxNumChildAggregators = 2

    def __init__(self, runtime=None, maxSizeDeviceHolder=-1, initTask=None):
        """!
        Instantiate a Selector (singleton).

        @param runtime The runtime for the connections to physical devices
        @param maxSizeDeviceHolder Maximal amount of devices in a device holder
        @param devices List of connected devices
        @param aggregators  List of aggregators
        @param device_holders List of device_holders
        @param taskQueue List of tasks in queue
        @param initTask task which must be executed at each device firstly
        """
        self._runtime = runtime
        self._maxSizeDeviceHolder = maxSizeDeviceHolder
        self._devices = []
        self._aggregators = []
        self._device_holders = []
        self._taskQueue = []
        self._initTask = initTask
        self.logger = LogServer(__name__)
        self.logger.log().info('Selector initiated')

    @property
    def runtime(self):
        """!
        property: runtime. Implements the getter
        """
        return self._runtime

    @property
    def devices(self):
        """!
        property: devices. Implements the getter for the registeredDevices.
        Get the registeredDevices directly from runtime. For new devices
        send initTask directly out.
        """

        self._devices = self._runtime.registeredDevices
        if self._initTask:
            self.send_initTask_to_newDevices(self._devices)
        return self._devices

    @property
    def deviceNames(self):
        """!
        property: name of devices. Implements the getter
        @todo: is this property necessary?
        """
        self.logger.log().debug("selector.deviceNames: " +
                                str([device.name for device in self.devices]))
        return [device.name for device in self.devices]

    @property
    def device_hardwareConfigs(self):
        """!
        property: device_hardwareConfigs. Implements the getter
        @todo: is this property necessary ?
        """
        self.logger.log().debug(
            "selector.device_hardwareConfigs: " +
            str([device.hardwareConfig for device in self.devices]))
        return [device.hardwareConfig for device in self.devices]

    @property
    def device_holders(self):
        """!
        property: device_holders. Implements the getter
        """
        return self._device_holders

    @device_holders.setter
    def device_holders(self, newDeviceHolders):
        """!
        property: device_holders. Implements the setter

        @param newDevice_holders the new list of device_holders
        """
        self.logger.log().debug("selector. set new deviceholder")
        self._device_holders = newDeviceHolders

    @property
    def initTask(self):
        """!
        property: initTask. Implements the getter
        """
        return self._initTask

    @initTask.setter
    def initTask(self, newInitTask):
        """!
        property: newInitTask. Implements the setter. 
        Update all devices directly with new init task

        @param newInitTask instance of class task
        @todo: check if new init task result is returned
        """
        self.logger.log().debug("selector. set new initTask")
        if not isinstance(newInitTask, InitTask):
            raise ValueError("object is no instance of InitTask")
        self._initTask = newInitTask
        self.send_initTask_to_newDevices(self.devices)

    @property
    def aggregators(self):
        """!
        property: aggregators. Implements the getter
        """
        return self._aggregators

    @aggregators.setter
    def aggregators(self, newAggregators):
        """!
        property: aggregators. Implements the setter

        @param newAggregators the new list of aggregators
        """
        self._aggregators = newAggregators

    @property
    def maximal_size_device_holder(self):
        """!
        property: maxSizeDeviceHolder. Implements the getter
        """
        self.logger.log().debug("selector. _maxSizeDeviceHolder " +
                                str(self._maxSizeDeviceHolder))
        return self._maxSizeDeviceHolder

    @maximal_size_device_holder.setter
    def maximal_size_device_holder(self, newSize):
        """!
        property: maxSizeDeviceHolder. Implements the setter

        @param newSize new maximal number of allowd device holders.
        """
        self.logger.log().debug("selector. maximal_size_device_holder " +
                                str(newSize))
        self._maxSizeDeviceHolder = newSize

#-------------- functions for device related aspects----------------------

    def send_initTask_to_newDevices(self, deviceList):
        """! In the case that a device has connected on their own we must send 
            the init task to them before sending another tasks.
        """
        self.logger.log().debug(
            "selector. send_initTask_to_newDevices. deviceList " +
            str(deviceList))
        initializationDevices = []
        for device in deviceList:
            if device.hasTask(self.initTask.taskName) == False:
                initializationDevices.append(device)
        number_not_inializedDevices = len(initializationDevices)
        if number_not_inializedDevices > 0:
            deviceHolder = DeviceHolder(maxSize=number_not_inializedDevices)
            for device in initializationDevices:
                deviceHolder.addDevice(device, self.initTask.taskName,
                                       self.initTask.parameterDict)
            deviceHolder.broadcastTask(self.initTask)

    def addSingleDevice(self, deviceName, ipAdress, port, hardwareConfig):
        """!
        Add a single device to runtime

        @param deviceName string with device name
        @param ipAdress string with IP adress
        @param port int with device port
        @param hardwareConfig dict with devices hardware config
        """
        self.logger.log().debug('addSingleDevice:' + "deviceName " +
                                deviceName + ",ipAdress " + str(ipAdress) +
                                ",port " + str(port) + ",hardwareConfig " +
                                str(hardwareConfig))
        initTask = self.initTask
        self.runtime.generate_and_add_SingleDevice(deviceName, ipAdress, port,
                                                   hardwareConfig, initTask)

    def removeDevice(self, deviceName):
        """!
        Remove a device from the runtime. A possible reason for that could 
        be that this device is corrupted.
    
        @param deviceName string with Name of device
        """
        if deviceName in self.deviceNames:
            self.runtime.removeDevice(deviceName)
        else:
            self.logger.log().error("There is no device with name " +
                                    deviceName)
            raise ValueError("There is no device with name " + deviceName)

    def requestTaskAcceptance(self, task):
        """!
        Decide if enough devices fullfil the hardware requirements
        of the incoming task.
        In a first step the selector determines the devices, which are currently available for computation.
        In a second step the possible devices are checked from the task, if the fullfill the task criteria.
        Based on this criteria accept or reject the task. 

        @param task instance of task
        
        @return task_acceptance boolean 
        """
        initializedDevices = []
        for device in self.devices:
            if device.initialized:
                initializedDevices.append(device)
        task_acceptance = task.checkConstraints(initializedDevices)
        return task_acceptance

#------------functions for aggregator related aspects --------

    def get_aggregator_of_task(self, taskName):
        """!
        Iterate over all known aggregators to get the aggregator
        of the specific task. Raise a ValueError if the aggregator
        doesn't exist.

        @param taskName string with task name
        @return aggregator instance of Aggregator
        """
        self.logger.log().debug(
            "Selector. get_aggregator_of_task: search aggregator for task " +
            taskName)
        for aggregator in self.aggregators:
            if aggregator.task.taskName == taskName:
                self.logger.log().debug(
                    "Selector.get_aggregator_of_task: aggregator for " +
                    taskName + " identified")
                return aggregator

        raise ValueError("There is no aggregator that handles task " +
                         taskName)

    def addAggregator(self, newAggregator):
        """!
        Add a new aggregator to the aggregtor list.

        @param newAggregator instance of aggregator
        """
        aggregators = self.aggregators
        self.logger.log().debug("Selector.addAggregator: old " +
                                str(len(self.aggregators)))
        aggregators = aggregators + [newAggregator]
        self.aggregators = aggregators
        self.logger.log().debug("Selector.addAggregator: new " +
                                str(len(self.aggregators)))

    def deleteAggregatorAndTask(self, taskName):
        """!
        Get the aggregator of the task, stop the task on the DART-Server 
        and remove the task from the open task dict on each device. Afterwards 
        the aggregator is deleted and new tasks are uploaded to the DART-Server

        @param taskName string with task name
        """
        try:
            aggregator = self.get_aggregator_of_task(taskName)
        except ValueError:
            self.logger.log().error(
                "There is no aggregator that handles task " + taskName)

        aggregator.stopTask()
        self.deleteAggregator(aggregator)
        self.addTasks2Runtime()

    def deleteAggregator(self, aggregator):
        """!
        Remove a aggregator from the aggregator list and delete the aggregator.

        @param aggregator instance of aggregator
        """
        if aggregator in self.aggregators:
            self._aggregators.remove(aggregator)
            del aggregator
        else:
            raise ValueError("aggregator is not in selector")

    def instantiateAggregator(self, task):
        """!
        Instantiate DeviceAggregator. Check in create_needed_childAggregators
        if the aggregator has enough capacity for the amount of Device,
        if not create childAggregators recursively. Add task and devices
        to aggregator
        
        @param numDevices amount of devices
        """
        choosen_devices = self.getDevicesForAggregator(task)
        aggregator = DeviceAggregator(
            devices=choosen_devices,
            task=task,
            maxSizeDeviceHolder=self._maxSizeDeviceHolder,
            maxNumDeviceHolder=self._maxNumDeviceHolder,
            maxNumChildAggregators=self._maxNumChildAggregators,
            logServer=None)
        self.logger.log().info("max # devices in aggregator: " +
                               str(aggregator.get_max_number_devices()))
        self.addAggregator(aggregator)
        return aggregator

    def getDevicesForAggregator(self, task):
        """!
        Check which devices fullfill the requirements of the task
        and if they are available.

        @param task instance of task 
        """
        #TODO: only implemented for specificDeviceTask
        if not self.devices:
            raise ValueError("selector: no devices at all")

        suitable_devices = []
        for device in self.devices:
            # add task to device if right if not nothing
            #TODO check if all specificDevices are in self.devices
            if device.name in task.specificDevices:
                suitable_devices.append(device)
        # return the list
        if len(suitable_devices) == 0:
            raise ValueError("selector: no devices")
        else:
            return suitable_devices

#------------functions related to queue aspect---------------

    def taskInQueue(self, taskName):
        """!
        Check if task is in the task queue from selector.

        @param taskName string with task name
        @return booleanQueue boolean
        """
        booleanQueue = False
        for task in self._taskQueue:
            if task.taskName == taskName:
                booleanQueue = True
                break
        return booleanQueue

    def deleteTaskInQueue(self, taskName):
        """!
        Delete task from selectors task queue. Afterwards
        check if the DART-Server has capabilities for new tasks.

        @param taskName string with task name.
        """
        if self.taskInQueue(taskName):
            for task in self._taskQueue:
                if task.taskName == taskName:
                    self._taskQueue.remove(task)
        self.addTasks2Runtime()

    def addTask2Queue(self, task, priority=False):
        """!
        Add a new task to the queue. Task is already checked for feasibility

        @param task the task to be scheduled 
        """
        self.logger.log().info("selector. add task to queue")
        if task in self._taskQueue:
            self.logger.log().error("selector. task already scheduled")
            raise KeyError("Task already scheduled")

        # add task to queue
        if priority:
            self._taskQueue.insert(0, task)
        else:
            self._taskQueue.append(task)
        self.addTasks2Runtime()

    def addTasks2Runtime(self):
        """!
        Check if the DART-Server has capabilities to schedule new task.
        Iterate over all task in the queue and check which task can be executed 
        at the moment. If yes instantiate aggregator for task and remove task from 
        queue
        """
        capacitynewTasks = self.runtime.get_Capacity_for_newTasks()
        for task in self._taskQueue:
            if capacitynewTasks <= 0:
                break
            if self.requestTaskAcceptance(task):
                aggregator = self.instantiateAggregator(task)
                aggregator.sendTask()
                capacitynewTasks -= 1
                self.deleteTaskInQueue(task.taskName)

Exemple #17

0

Afficher le fichier

Fichier : dartRuntime.py Projet : cc-hpc-itwm/Fed-DART

class DartRuntime:
    
    def __init__( self
                , server 
                , client_key
                , testMode
                , errorProbability
                , maximal_number_devices = -1
                , maximalNumberOpenJobs = 10
                , **kwargs
                ):
        """!
        @param maximal_number_devices maximal number of devices for this runtime
        @param maximalNumberOpenJobs maximal number of allowed maximal jobs
        @param registeredDevices dict with key device name and value instance of class device
        @param messageTranslator translator between Python and DART format
        @param selector instance of selector
        @param server     the server addr, e.g., "https://127.0.0.1:7777"
        @param client_key the key of the client for identification (unused atm)
        @param counterJobs int number of open jobs on server
        """
        if testMode:
            self._restAPIClient = Client(server, client_key, probability_error = errorProbability, testmode = True)
        else:
            self._restAPIClient = Client(server, client_key)
        self._maximal_number_devices = maximal_number_devices
        self._maximalNumberOpenJobs = maximalNumberOpenJobs
        self._registeredDevices = {}
        self._messageTranslator = MessageTranslator()
        self._selector = None
        self._counterJobs = 0 #in our case Jobs = Task
        
        self.logger = LogServer(__name__)
        self.logger.log().info("DartRuntime initiated")
    
    @property
    def restAPIClient(self):
        """!
        property: runtime. Implements the getter
        """
        return self._restAPIClient

    @property
    def registeredDevices(self):
        """!
        property: registeredDevices. Implements the getter
        """
        self.updateRegisteredDevices()
        return list(self._registeredDevices.values())

    @property
    def registeredDevicesbyName(self):
        """!
        property: registeredDevices. Implements the getter
        """
        self.updateRegisteredDevices()
        return list(self._registeredDevices.keys())

    @registeredDevices.setter
    def registeredDevices(self, newRegisteredDevices):
        """!
        property: registeredDevices. Implements the setter

        @param newRegisteredDevices the new list of registered devices
        """
        self._registeredDevices = newRegisteredDevices

    @property
    def selector(self):
        """!
        property: registeredDevices. Implements the getter
        """
        return self._selector

    @property
    def maximal_number_devices(self):
        """!
        property: maximal_number_devices. Implements the getter
        """
        return self._maximal_number_devices
    
    @maximal_number_devices.setter
    def maximal_number_devices(self, new_maximal_number_devices):
        """!
        property: maximal_number_devices. Implements the setter

        @param new_maximal_number_devices the new maximal number devices
        """
        self._maximal_number_devices = new_maximal_number_devices

    @property
    def maximalNumberOpenJobs(self):
        """!
        property: maximalNumberOpenJobs. Implements the getter
        """
        return self._maximalNumberOpenJobs
    
    @property
    def counterJobs(self):
        """!
        property: counterJobs. Implements the getter
        """
        return self._counterJobs

    def updateRegisteredDevices(self):
        """!
        Fetch from the DART-server the currently connected devices. If needed, create new
        virtual devices or delete them.
        """
        oldRegisteredDevicesbyName = deepcopy(list(self._registeredDevices.keys()))
        newRegisteredDevices = self.restAPIClient.get_workers()
        newRegisteredDevices = newRegisteredDevices["workers"]
        newRegisteredDevicesbyName = []
        for device in newRegisteredDevices:
            newRegisteredDevicesbyName.append(device["name"])
        for newDevice in newRegisteredDevicesbyName:
            if newDevice not in oldRegisteredDevicesbyName: #add new Devices
                initTask = None 
                if self.selector is not None:
                    initTask = self.selector.initTask
                device = DeviceSingle( name = newDevice
                                     , ipAdress = None
                                     , port = None
                                     , dartRuntime = self
                                     , physicalName = None
                                     , hardwareConfig = {}
                                     , taskDict = {}
                                     , initTask = initTask
                                     )
                self._registeredDevices[newDevice] = device
        for oldDevice in oldRegisteredDevicesbyName:
            if oldDevice not in newRegisteredDevicesbyName:
                del self._registeredDevices[oldDevice]

    def getServerInformation(self):
        """!
        Gets information about the servers.
        The return type has the following structure
        {'servers': [{'host': '<host_name>', 'port': '<port_name>'}]}
        """
        return self.restAPIClient.get_server_information()

    def get_Capacity_for_newTasks(self):
        """!
        The server have a maximal amount of simultaneously open jobs.
        Determine the difference between the maximal and curent amount.
        """
        return self.maximalNumberOpenJobs - self._counterJobs

    def get_TaskStatus(self, taskName):
        """!
        Get the status of the job from runtime. In our case is job the same
        as a task.

        @param taskName string with name of the task

        @return int 0 (unknown), 1 (running), 2 (stopped)
        """
        return self.restAPIClient.get_job_status(taskName)

    def remove_result_from_server(self, taskName, resultID):
        """!
        Remove a result with his ID from a job.

        @param taskName string with name of the task
        @resultID unique identifier for the task result of a specifif device
        """
        self.restAPIClient.delete_job_result(taskName, resultID)

    def get_TaskResult(self, taskName, deviceName):
        """!
        Send a regex pattern with the device name to the server and get maximal so many results 
        as devives are available. Extract the result and ID from the server message like
        { 'results': [{ 'id': '4d045be3-fb57-44f4-902f-b93abab3d830', 'job': 'task_one'
                      , 'worker': 'device_one-PSL188-1', 'start_time': '1611135657000'
                      , 'duration': '6.07332611', 'success': 'gASVHwAAAAAAAAB9lCiMCHJlc3VsdF8wlEsBjAhyZXN1bHRfMZRLCnUu\n'
                      }
                     ]
        , 'job': {'id': 'task_one', 'status': '1'}
        }
        with the messageTranslator.

        @param taskName string with task name
        @param deviceName string with devie name

        @return resultDevice dict with format {'duration': '6.07465839', 'result': {'result_0': 1, 'result_1': 10}}
        @return resultID string with format '5ad55670-3ad4-4bb9-99cc-2b82b85bd8c2'
        """
        maxNumberResults = len(self.registeredDevices)
        taskResult = self.restAPIClient.get_job_results(taskName, maxNumberResults, deviceName + ".*")
        resultDevice, resultID = self._messageTranslator.convertDart2Python(taskResult, deviceName)
        self.logger.log().debug("DartRuntime.get_TaskResult: " +  str(locals()))
        return resultDevice, resultID

    def instantiateSelector(self, max_size_deviceHolder):
        """!
        Create the Selector after starting the runtime

        @param max_size_deviceHolder maximal amount of device per deviceholder
        @param self._selector instance of class selector
        """
        self._selector = Selector(self, max_size_deviceHolder)
        return self._selector

    def add_SingleDevice( self, device):
        """!
        Add an already existing single device (one worker per device) to runtime. send directly the
        initTask to device.
        
        @param device device to be registered
        """
        self.logger.log().debug("dartRuntime.add_SingleDevice " + str(locals())) 
        if device.name in self._registeredDevices.keys():
            self.logger.log().error("device name already in list: " + device.name)
            raise KeyError("device name already in list")
        
        self._registeredDevices[device.name] = device
        #add workers is blocking!
        self.restAPIClient.add_worker( [device.ipAdress], 1, device.name, [""],0,{})
        if device.initTask is not None:
            device.startTask(device.initTask)
        #TODO Luca: where to specify port ?!
        self.logger.log().info("dartRuntime.add_SingleDevice " + device.name + " registered") 

    
    def generate_and_add_SingleDevice( self
                       , deviceName
                       , deviceIp
                       , port
                       , hardwareConfig
                       , initTask
                       ):
        """!
        Add a single device (one worker per device) to runtime. Therefore
        also create an instane of DeviceSingle. Afterwards send directly the
        initTask to device.
        
        @param deviceName string with device name
        @param deviceIp ip address of real physical device
        @param port port of real physical device
        @param hardware_config hardware properties like processor type, memory
               connection bandwith and so on
        @todo: specify hardwareConfig
        @param initTask instance of class initTask
        """
        self.logger.log().debug("dartRuntime.generate_and_add_SingleDevice " + str(locals())) 
        if deviceName in self._registeredDevices.keys():
            self.logger.log().error("device name already in list: " + deviceName)
            raise KeyError("device name already in list")
        device = DeviceSingle( name = deviceName
                             , ipAdress = deviceIp
                             , port = port
                             , dartRuntime = self
                             , physicalName = None
                             , hardwareConfig = hardwareConfig
                             , taskDict = {}
                             , initTask = initTask
                             )
        self._registeredDevices[deviceName] = device
        #add workers is blocking!
        self.restAPIClient.add_worker( [deviceIp], 1, deviceName, [""],0,{})
        if initTask is not None:
            device.addTask(initTask.taskName,  initTask.parameterDict)
            device.startTask(initTask)
        #TODO Luca: where to specify port ?!
        self.logger.log().info("dartRuntime.generate_and_add_SingleDevice " + deviceName + " registered") 

    def removeDevice(self, deviceName):
        """!
        Remove device from runtime and registeredDevice list.
        
        @param deviceName string with name of device
        @param device instance of device

        @todo good idea to destroy device ?
        """
        if deviceName not in self.registeredDevicesbyName:
            self.logger.log().error("device name not in list: " + deviceName)
            raise KeyError("device name is not in list")
        device = self.getDevice(deviceName)
        self.restAPIClient.remove_workers(device.ipAdress)
        del device #TODO: good idea to destroy device
        del self._registeredDevices[deviceName]

    def getDevice(self, deviceName):
        """!
        Get the instance of device by name.
        @param deviceName string with deviceName

        @return instance of device
        """
        if deviceName not in self.registeredDevicesbyName:
            self.logger.log().error("device name not in list: " + deviceName)
            raise KeyError("device name is not in list")
        return self._registeredDevices[deviceName]

    def get_job_status(self, jobName):
        """!
        The job status can have the values unknown, stopped
        or running. The status is translated into an int.

        @param jobName name of job. Equal to taskName

        @return int 0,1 or 2.
        @todo atm hacky.
        """
        jobStatus = self.restAPIClient.get_job_status(jobName)
        #TODO: ask Luca why return is job_status.unknown
        if jobStatus == job_status.unknown:
            return 0
        elif jobStatus == job_status.stopped:
            return 2
        else: 
            return 1
    
    def add_job(self, name, module_path, method):
        """!
        In our implemtation we start for every task an own job to have 
        a clear separation between different federated learning rounds
        
        @param name string with job/task name
        @module path relativ path to file based on default path in worker.json
        @method method with should be executed in file
        """
        self.restAPIClient.add_job(name, module_path, method)
        self._counterJobs += 1
        self.logger.log().info("added job: " + module_path + " " + 
                str(method) + " new #jobs: " + str(self._counterJobs))
        return
        
    def add_tasks(self, jobName, location_and_parameters):
        """!
        Add task to a job. We have for every task a new job. We specify
        for each device his own parameter.

        @param location_and_parameters list of form [ { 'location' : '...', 'parameter' : ' ...'}, ...]
        @todo this function can be removed ?!
        """
        self.restAPIClient.add_tasks(jobName, location_and_parameters)
        self.logger.log().error("added task: " + jobName + " " + str(location_and_parameters))

        return
        
    def broadcastTaskToDevices(self, taskName, deviceNamesList, parameterList):
        """!
        Send task to specified physical devices at the same time

        @param taskName string of task name
        @param deviceNamesList list of device names like ['device_one', 'device_two']
        @param parameterList specifies parameters for devices like 
               [{'param1': 0, 'param2': 1}, {'param1': 10, 'param2': 5}]
        """
        self.logger.log().debug("broadcastTaskToDevices")
        for deviceName in deviceNamesList:
            if deviceName not in self.registeredDevicesbyName:
                self.logger.log().error("broadcastTaskToDevices: " + deviceName + " is not known!")
                raise ValueError("Device with name " + deviceName + " is not known!")
        parameterDARTformat = self._messageTranslator.convertPython2Dart(deviceNamesList, parameterList)
        self.restAPIClient.add_tasks(taskName, parameterDARTformat)
        
    def get_ServerInformation(self):
        """!
        Return server informations

        @todo error messages in the moment
        """
        self.logger.log().debug("get_ServerInformation " + str(self.runtime.get_server_information()))
        return self.restAPIClient.get_server_information()
        
    def stopTask(self, taskName):
        """!
        Stop a task/job on the server. Therefore also
        decrease the counter of Jobs on the server.

        @param taskName string with task name
        """
        self._counterJobs -= 1
        self.restAPIClient.stop_job(taskName)
        self.logger.log().debug("stopTask " + taskName + " new #jobs: " + str(self._counterJobs))

    def stopRuntime(self):
        """!
        Stop the server.
        """
        self.restAPIClient.stop_servers()

Exemple #18

0

Afficher le fichier

Fichier : specificDeviceTask.py Projet : cc-hpc-itwm/Fed-DART

class SpecificDeviceTask(TaskBase):
    """
    As SpecificDeviceTask, we define to execute a given task with (possibly)
    different parameter settings (training, etc) on the specific devices that 
    fulfill the (optional) hardware requirements.
    """
    def __init__(self,
                 taskName=None,
                 parameterDict={},
                 model=None,
                 hardwareRequirements={},
                 filePath=None,
                 executeFunction=None,
                 configFile=None):
        """!
        Instantiates a SpecificDeviceTask. Information about the configuration have to be 
        either provided in the function call or via a configFile.

        @param taskName (optional) Name of the task to be stored in the LogServer
        @param parameterDict Dict of device names and associated parameters to be used in training
        @param model (optional) Model to be executed
        @param hardwareRequirements (optional) mandatory hardware requirements
        @param filePath the path to the file to be called for execution on the device
        @param executeFunction name of function, which shoule be executed in filePath
        @param configFile (optional)
        """
        self._parameterDict = parameterDict
        self._model = model
        self._filePath = filePath
        self._hardwareRequirements = hardwareRequirements
        self._configFile = configFile
        self._executeFunction = executeFunction
        self._taskName = taskName

        self.logger = LogServer(__name__)
        self.logger.log().info("SpecificDeviceTask initiated")
        self.checkConfig()

    @property
    def filePath(self):
        return self._filePath

    @property
    def taskName(self):
        return self._taskName

    @property
    def numDevices(self):
        return len(self.specificDevices)

    @property
    def executeFunction(self):
        return self._executeFunction

    @property
    def parameterDict(self):
        """!
        property: parameterDict. Implements the getter.

        @return format {"device_name": deviceParameterDict}      
        """
        return self._parameterDict

    @parameterDict.setter
    def parameterDict(self, new_parameterDict):
        """!
        property: parameterDict. Implements the setter.
        @param new_parameterDict the new parameterDict
        """
        self._parameterDict = new_parameterDict

    @property
    def model(self):
        """!
        property: model. Implements the getter.

        A model is an optional parameter for a task. In case a model is provided,
        the model is sent to the devices where it will be trained/used for inference.
        In case no model is provided, the task executes some given functions on 
        the device (filePath has to be defined).
        """
        return self._model

    @model.setter
    def model(self, new_model):
        """!
        property: model. Implements the setter.
        @param new_model the new model to be trained
        """
        self._model = new_model

    @property
    def specificDevices(self):
        """!
        Returns a list of devices on which the task is to be executed.
        """
        return list(self._parameterDict.keys())

    @property
    def configFile(self):
        """!
        property: configFile. Implements the getter.

        The configFile is an optional file provided at instantiation to describe the task configuration, i.e.
        it contains the parameters for training, (optionally) the model to be executed,
        which hardwareRequirements are mandatory, the name of the task, as well as the path to the 
        python script to be executed.

        @todo define the structure of the configFile
        """

        return self._configFile

    def writeConfig(self, taskName, parameterlists, hardwareRequirements,
                    model, filePath):
        """!
        @todo implement

        @param taskName Name of the task
        @param parameterlists List of parameters
        @param hardwareRequirements mandatory hardware requirements
        @param model model to be trained
        @param filePath path to the file to be executed on the device
        """
        return

    def checkConfig(self):
        """!
        This method ensures that all necessary parameters for the given task are
        provided: either by given parameters in the instantiation of the subclass
        or by a given filepath to a configuration file.
        """

        config = {}
        valid = False
        if self._parameterDict is None:
            if self._configFile is None:
                self.logger.log().error(
                    "specificDeviceTask.checkConfig: no config provided")
                raise ValueError("No configuration provided")
            else:
                config = self.loadConfigFile()
        else:
            config['parameters'] = self._parameterDict
            config['hardwareRequirements'] = self._hardwareRequirements
            config['model'] = self._model
            config['taskName'] = self._taskName
            config['filePath'] = self._filePath
            self.logger.log().debug("specificDeviceTask.checkConfig: " +
                                    str(locals()))

        # check the configuration
        # todo: define configfile
        # why we nedd load and write of the config ? - might be too large
        if self._parameterDict is None:
            self.writeConfig(config['taskName'], config['parameters'],
                             config['hardwareRequirements'], config['model'],
                             config['filePath'])

        return valid

    def getDeviceParameterDict(self, deviceName):
        if deviceName not in self.specificDevices:
            self.logger.log().error(
                "specificDeviceTask.getDeviceParameterDict: " + deviceName +
                "does not apply for this task")
            raise KeyError("Device with name" + deviceName +
                           " not included in task")
        else:
            return self.parameterDict[deviceName]

    def checkConstraints(self, listDevices):
        """!
        Check if all devices, which are specified by name in the task are in 
        list_device and if they fullfill the hardware requirements.

        @param list_device currently available devices 
        @todo implement check hardware requirements if necessary
        """
        devicesSuited = True
        listDeviceNames = [device.name for device in listDevices]
        for task_neededDevice in self.specificDevices:
            if task_neededDevice not in listDeviceNames:
                devicesSuited = False
        return devicesSuited

    def loadConfigFile(self):
        """!
        @todo implement this function
        """
        return

Exemple #19

0

Afficher le fichier

Fichier : deviceAggregator.py Projet : cc-hpc-itwm/Fed-DART

class DeviceAggregator(AggregatorBase):
    """!
    DeviceAggregator is responsible for the data aspect of a task
    """
    def __init__(self,
                 devices,
                 task,
                 maxSizeDeviceHolder=10,
                 maxNumDeviceHolder=5,
                 maxNumChildAggregators=5,
                 logServer=None):
        """!
        @param task instance of a task
        @param deviceHolders list of deviceHolders
        @param childAggregators list of childAggregators
        @param maxSizeDeviceHolder maximum number of devices for deviceHolder
        @param maxNumDeviceHolder maximum number of deviceHolders per aggregator/childaggregator
        @param maxNumChildAggregators maximum number of allowed childAggregators
        @param aggregatedResult aggregated result of local task results
        @param logServer storage of the results and/or aggregated result 

        """
        self._task = task
        self._maxSizeDeviceHolder = maxSizeDeviceHolder
        self._maxNumDeviceHolder = maxNumDeviceHolder
        self._maxNumChildAggregators = maxNumChildAggregators
        self._deviceHolders = []
        self._childAggregators = []
        numDevices = task.numDevices
        if maxNumChildAggregators > 0:
            amount_childAggregators = self.compute_required_childAggregators_count(
                numDevices)
            self._childAggregators = [
                DeviceAggregator(
                    [
                    ]  #init at first without devices; devices will be added at the end of constructor
                    ,
                    task,
                    maxSizeDeviceHolder=maxSizeDeviceHolder,
                    maxNumDeviceHolder=maxNumDeviceHolder,
                    maxNumChildAggregators=0,
                    logServer=logServer)
                for _ in range(amount_childAggregators)
            ]
        self._aggregatedResult = None
        self._logServer = logServer
        self._instantiateDeviceHolders()
        for device in devices:
            self.addSingleDevice(device)  #add here task to devices

        self.logger = LogServer(__name__)
        self.logger.log().info("Aggregator initiated")

#--------------------------------------------

    @property
    def maxNumDeviceHolder(self):
        """!
        property: maxNumDeviceHolder. Implements the getter
        """
        return self._maxNumDeviceHolder

#--------------------------------------------

    @property
    def maxNumChildAggregators(self):
        """!
        property: maxNumChildAggregators. Implements the getter
        """
        return self._maxNumChildAggregators

#--------------------------------------------

    @property
    def maxSizeDeviceHolder(self):
        """!
        property: maxSizeDeviceHolder. Implements the getter
        """
        return self._maxSizeDeviceHolder

#----------------------------------

    @property
    def logServer(self):
        """!
        property: logServer. Implements the getter
        """
        return self._logServer

    @logServer.setter
    def logServer(self, newLogServer):
        """!
        property: logServer. Implements the setter

        @param newLogServer the new logServer
        """
        self._logServer = newLogServer
#----------------------------------

    @property
    def task(self):
        """!
        property: task. Implements the getter
        """
        return self._task

    @task.setter
    def task(self, newTask):
        """!
        property: deviceHolders. Implements the setter

        @param newTask instance of task
        """
        self._task = newTask
        if self.childAggregators:
            for child in self.childAggregators:
                child.task = newTask
#----------------------------------

    @property
    def deviceHolders(self):
        """!
        property: deviceHolders. Implements the getter
        """
        return self._deviceHolders

    @deviceHolders.setter
    def deviceHolders(self, newDeviceHolders):
        """!
        property: deviceHolders. Implements the setter

        @param newDeviceHolders list of deviceHolders
        """
        if self._childAggregators:
            raise ValueError("Child aggregators exist!")
        self._deviceHolders = newDeviceHolders
#------------------------------------

    @property
    def allDevices(self):
        """!
        property: currentDevices. Implements the getter
        """
        allDevices = []
        for dHolder in self.deviceHolders:
            allDevices.extend(dHolder.devices)

        if self.childAggregators:
            for child in self.childAggregators:
                for device_holder in child.deviceHolders:
                    allDevices.extend(device_holder.devices)

        return allDevices
#-------------------------------------

    @property
    def childAggregators(self):
        """!
        property: childAggregators. Implements the getter
        """
        return self._childAggregators

    @childAggregators.setter
    def childAggregators(self, newChildAggregators):
        """!
        property: childAggregators. Implements the setter

        @param newChildAggregators list of childAggregators
        """
        self._childAggregators = newChildAggregators
#-------------------------------------

    @property
    def aggregatedResult(self):
        """!
        property: aggregatedResult. Implements the getter
        """
        return self._aggregatedResult

    @aggregatedResult.setter
    def aggregatedResult(self, newAggregatedResult):
        """!
        property: aggregatedResult. Implements the setter

        @param newAggregatedResult
        """
        self._aggregatedResult = newAggregatedResult

    def get_max_number_devices(self):
        """!
        Get total number of devices from deviceAggregator or in case 
        of childAggregators iterate over all child aggregators.

        @return total int number of maximal amount of devices
        """
        if not self.childAggregators:
            return self._maxNumDeviceHolder * self._maxSizeDeviceHolder
        else:
            total = 0
            for child in self.childAggregators:
                total += child.get_max_number_devices()
            return total

    def get_OnlineDevices(self):
        """!
        Get a list of devices, which are online.
        In case of childAggregators iterate over childaggregatos
        and get from them the online devices

        @return deviceList
        """
        deviceList = []
        if self.childAggregators:
            for aggregator in self.childAggregators:
                deviceList.extend(aggregator.get_OnlineDevices())
            return deviceList
        for dHolder in self.deviceHolders:
            deviceList.extend(dHolder.getOnlineDevices())
        return deviceList

#-------------functions for setting up device aggregator------------------

    def compute_required_childAggregators_count(self, numberDevices):
        """!
        Check if child aggregators are required and compute the amount

        @param numberDevices int amount of required devices
        """
        amount_needed_childAggregators = 0
        maxDevices = self._maxNumDeviceHolder * self._maxSizeDeviceHolder
        if maxDevices < numberDevices:
            # get necessary devices per childAggregators
            devicesPerChild = math.ceil(numberDevices /
                                        self._maxNumChildAggregators)
            if devicesPerChild > self._maxSizeDeviceHolder:
                raise ValueError(
                    "More devices are required than allowed per child aggregator!"
                )
            amount_needed_childAggregators = math.ceil(numberDevices /
                                                       maxDevices)
        return amount_needed_childAggregators

    def _instantiateDeviceHolders(self):
        """!
        Instantiate devicHolders and append to list of DeviceHolders
        """
        if len(self.deviceHolders) > 0:
            raise ValueError("device holder already instantiated")
        elif len(self.childAggregators) > 0:
            pass
        else:
            for i in range(self._maxNumDeviceHolder):
                deviceHolder = DeviceHolder(self.maxSizeDeviceHolder)
                self.deviceHolders.append(deviceHolder)

    def addSingleDevice(self, device):
        """!
        Add single device to aggregator. If we have child aggregators
        iterate over child aggregators and add it to the first one,
        who have capacity.

        @param device instance of class deviceSingle
        """
        if not isinstance(device, DeviceSingle):
            raise Exception("Device is not an instance of DeviceSingle !")
        if self.childAggregators:
            for aggregator in self.childAggregators:
                if aggregator.addSingleDevice(device) == True:
                    return
            raise ValueError("Device holders are completely full!")
        else:
            for deviceHolder in self.deviceHolders:
                if device in deviceHolder.devices:
                    raise Exception("Device is already in deviceHolder!")
                if deviceHolder.check_full() == False:
                    deviceName = device.name
                    deviceParameterDict = self.task.getDeviceParameterDict(
                        deviceName)
                    taskName = self.task.taskName
                    deviceHolder.addDevice(device, taskName,
                                           deviceParameterDict)
                    return True
            if self.maxNumChildAggregators > 0:
                raise ValueError("Device holders are completely full!")

#-------------functions regarding task status-----------------------

    def sendTask(self):
        """!
        Send task to device.

        Each Aggregator iterate over the device holders, which broadcast the 
        task to the runtime.
        """
        if self.task is None:
            raise ValueError("There is no task assigned!")
        if self.childAggregators:
            for aggregator in self.childAggregators:
                aggregator.sendTask()
        else:
            for device_holder in self.deviceHolders:
                if not device_holder.check_empty():
                    device_holder.broadcastTask(self.task)

    def isTaskFinished(self):
        """!
        Return the status of the task. If there is no child aggregators iterate
        over all deviceHolders and check the task status. To look at each deviceHolder
        is necessary, because the devices in different deviceHolders can have different 
        servers. Each deviceHolder iterates over his devices and get the device task status.
        Based on this number we decide over the task status.
        
        @return string "in progress" or "finished"
        """
        if self.task is None:
            raise ValueError("There is no task assigned!")
        taskFinished = True
        taskName = self.task.taskName
        if not self.childAggregators:
            for device_holder in self.deviceHolders:
                dhFinished = device_holder.devicesFinished(taskName)
                if not dhFinished:
                    taskFinished = False
                    break
        for aggregator in self.childAggregators:
            childAggregatorTaskFinished = aggregator.isTaskFinished()
            if not childAggregatorTaskFinished:
                taskFinished = False
                break
        return taskFinished

    def stopTask(self):
        """!
        Remove the task from each device over the device holders
        from the aggregator.
        Afterwards delete the deviceHolders. Already finished devices
        has logged their results on their own
        """
        if self.task is None:
            raise ValueError("There is no task assigned!")
        taskName = self.task.taskName
        if self.childAggregators:
            for childAggregator in self.childAggregators:
                childAggregator.stopTask()
        else:
            for deviceHolder in self.deviceHolders:
                deviceHolder.stopTask(taskName)
                del deviceHolder

#-----------------functions for result aggregation-----------------

    def aggregate_devicesResults(self):
        """!
        Collect results from device, which are finished, and aggregate them optional.

        @return list with instances of taskResult
        """
        # check all deviceholders
        if not self.deviceHolders:
            self.logger.log().info('no device holders available')
        # get all devices from the deviceholders
        taskName = self.task.taskName
        intermediateResults = []
        for dHolder in self.deviceHolders:
            intermediateResults.extend(dHolder.get_finishedTasks(taskName))
        return intermediateResults

    def requestAggregation(self):
        """!
        In case of an aggregator with childaggregators, this function triggers the 
        aggregations by the child aggregators. Otherwise, it aggregates the results
        provided by all finished devices in its deviceHolders. 

        @param boolean_aggregate option to aggregate the results directly
        @return list with instances of taskResult
        """

        if not self.childAggregators:
            self.logger.log().info(
                'deviceAggregator.requestAggregation: collect results from devices'
            )
            aggregatedResult = self.aggregate_devicesResults()
            return aggregatedResult
        #TODO: in moment sequential, parallelize it
        result = []
        for aggregator in self.childAggregators:
            self.logger.log().info(
                'deviceAggregator.requestAggregation: trigger aggregation by childaggregators'
            )
            result.extend(aggregator.requestAggregation())
        return result

#--------------functions for writing log------------------------

    def sendLog(self, task):
        """!
        In case of no childAggregators iterate over device holders and devices.
        Write the log to the logServer.
        In case of childAggregators iterate over them first.

        @param task instance of class task
        """
        taskName = task.taskName
        # get devices from device holders if there are no child aggregators
        if not self.childAggregators:
            for deviceHolder in self.deviceHolders:
                for device in deviceHolder.devices:
                    logs = device.getLog(task)
                    #return False when the device does not has this specific task
                    if logs and self.logServer:
                        self.logServer.writeLog(logs)
        # if there are child aggregators, get them first to trigger them sending the log
        else:
            for aggregator in self.childAggregators:
                aggregator.sendLog(task)

Exemple #20

0

Afficher le fichier

Fichier : workflowManager.py Projet : cc-hpc-itwm/Fed-DART

class WorkflowManager:

    TASK_STATUS_IN_PROGRESS = "in progess"
    TASK_STATUS_IN_QUEUE = "in queue"
    TASK_STATUS_FINISHED = "finished"
    taskID = 0

    def __init__(self,
                 testMode=False,
                 errorProbability=0,
                 logLevel=3,
                 maxSizeDeviceHolder=10,
                 maximalNumberOpenJobs=10):
        """!
        @param maxSizeDeviceHolder maximal size for deviceHolders

        @todo: can runtime and maxSizeDeviceHolder moved to selector ?
        Or first create selector and than add runtime to that ? better
        if we have multiple server ?
        """
        self._runtime = None
        self._selector = None
        self._maxSizeDeviceHolder = maxSizeDeviceHolder
        self._maximalNumberOpenJobs = maximalNumberOpenJobs
        self._initTask = None
        self._testMode = testMode
        self._currentDeviceNames = []
        self._errorProbability = int(errorProbability)

        loglevel = LogServer.ERROR
        if int(logLevel) == 0:
            loglevel = LogServer.DEBUG
        elif int(logLevel) == 1:
            loglevel = LogServer.INFO
        elif int(logLevel) == 3:
            loglevel = LogServer.ERROR
        else:
            loglevel = LogServer.FATAL

        self.logger = LogServer(__name__,
                                console_level=loglevel,
                                file_level=LogServer.DEBUG)
        self.logger.log().info("Workflow manager initiated")

    @property
    def maxSizeDeviceHolder(self):
        self.logger.log().debug("_maxSizeDeviceHolder " +
                                str(self._maxSizeDeviceHolder))
        return self._maxSizeDeviceHolder

    @property
    def config(self):
        return self._args

    @property
    def runtime(self):
        return self._runtime

    @property
    def selector(self):
        return self._selector

    @property
    def maximalNumberOpenJobs(self):
        self.logger.log().debug("_maximalNumberOpenJobs " +
                                str(self._maximalNumberOpenJobs))
        return self._maximalNumberOpenJobs

    def startFedDART(self,
                     runtimeFile=None,
                     deviceFile=None,
                     maximal_numberDevices=-1):
        """!
        @param deviceFile path to already known devices
        @todo specify deviceFile
        """

        self.logger.log().debug('start feddart server, config: ' +
                                str(locals()))

        with open(runtimeFile) as runtimeFile:
            runtime = json.load(runtimeFile)
            self._runtime = DartRuntime(runtime["server"],
                                        runtime["client_key"], self._testMode,
                                        self._errorProbability,
                                        maximal_numberDevices,
                                        self.maximalNumberOpenJobs)
            self._selector = self.runtime.instantiateSelector(
                self._maxSizeDeviceHolder)
        if self._initTask:
            self.selector.initTask = self._initTask
        if deviceFile is not None:
            with open(deviceFile) as deviceFile:
                deviceFile = json.load(deviceFile)
                for deviceName in deviceFile:
                    self.selector.addSingleDevice(
                        deviceName, deviceFile[deviceName]["ipAdress"],
                        deviceFile[deviceName]["port"],
                        deviceFile[deviceName]["hardware_config"])

    def removeDevice(self, deviceName):
        self.logger.log().debug("remove device. deviceName " + str(deviceName))
        self.selector.removeDevice(deviceName)

    def _sendTaskRequest(self, task):
        """!
        Send the task to the selecor. Based on hardware requirements and needed devices
        the selector decide accept or reject the task
        @param task instance of task
        """
        self.logger.log().info("requestTaskAcceptance")
        self.logger.log().debug(str(locals()))
        return self.selector.requestTaskAcceptance(task)

    def getTaskStatus(self, taskName):
        """!
        Ask the selector for the aggregator of the task. 
        The aggregator knows the status of the task
        @param taskName name of the task

        @return string "in queue", "in progress" or "finished"
        """
        self.logger.log().debug("getTaskStatus. task " + str(taskName))
        if self.selector.taskInQueue(taskName):
            return self.TASK_STATUS_IN_QUEUE
        else:
            try:
                aggregator = self.selector.get_aggregator_of_task(taskName)
                taskFinished = aggregator.isTaskFinished()
                if taskFinished:
                    self.logger.log().debug("TaskStatus " +
                                            str(self.TASK_STATUS_FINISHED))
                    return self.TASK_STATUS_FINISHED
                else:
                    self.logger.log().debug("TaskStatus " +
                                            str(self.TASK_STATUS_IN_PROGRESS))
                    return self.TASK_STATUS_IN_PROGRESS
            except ValueError:
                self.logger.log().error(
                    "workflowManager.getTaskStatus. there is no aggregator that handles task "
                    + taskName)
                self.logger.log().debug(str(locals()))

    def getServerInformation(self):
        serverinfo = self.selector.runtime.get_ServerInformation()
        self.logger.log().debug("WorkflowManager.get_ServerInformation: " +
                                str(locals()))
        return serverinfo

    def getTaskResult(self, taskName):
        """!
        Get the aggregator of the task and trigger the 
        colllection of the results

        @param taskName name of the task
        @param boolean_aggregate option to directly aggregate the result (e.g federated averaging)

        @return taskResult aggregated result or collected results from devices
        """
        self.logger.log().info("WorkflowManager.getTaskResult. taskName " +
                               str(taskName))
        if self.selector.taskInQueue(taskName):
            self.logger.log().debug(taskName + " still in queue.")
            return []
        else:
            try:
                aggregator = self.selector.get_aggregator_of_task(taskName)
                taskResult = aggregator.requestAggregation()
                if self.getTaskStatus(taskName) == self.TASK_STATUS_FINISHED:
                    self.stopTask(taskName)
            except ValueError:
                self.logger.log().error(
                    "workflowManager.getTaskResult. there is no aggregator that handles task "
                    + taskName)
                self.logger.log().debug(str(locals()))
                return []
            return taskResult

    def getAllDeviceNames(self):
        """!
        Return all known devices with name to the end user

        @return: list of device names
        """
        deviceNames = self.selector.deviceNames
        self.logger.log().debug("getAllDeviceNames. deviceNames " +
                                str(deviceNames))
        if self._currentDeviceNames == []:
            self._currentDeviceNames = deviceNames
        return deviceNames

    def getAvailableDeviceNames(self, listDeviceNames):
        currentDeviceNames = self.selector.deviceNames
        AvailableDeviceNames = []
        for deviceName in currentDeviceNames:
            if deviceName in listDeviceNames:
                AvailableDeviceNames.append(deviceName)
        return AvailableDeviceNames

    def getNewDeviceNames(self):
        """!
        Return all known devices with name to the end user

        @return: list of device names
        """
        oldDeviceNames = self._currentDeviceNames
        currentDeviceNames = self.selector.deviceNames
        newDeviceNames = []
        for deviceName in currentDeviceNames:
            if deviceName not in oldDeviceNames:
                newDeviceNames.append(deviceName)
        self.logger.log().debug("getAllDeviceNames. deviceNames " +
                                str(newDeviceNames))
        return newDeviceNames

    def createCollection(self, deviceNames):
        """!
        Cluster the devices to a group.

        @param deviceNames: list of device names
        @return: collection
        """
        return Collection(self, deviceNames)

    def stopTask(self, taskName):
        """!
        The task with the associated aggregator and deviceHolders is destroyed
        Should be done, when the task isn't needed anymore.
        @todo: add it as option to get_TaskResult ?!
        """
        self.logger.log().info("stopTask. taskName " + str(taskName))
        if self.selector.taskInQueue(taskName):
            self.selector.deleteTaskInQueue(taskName)
        else:
            self.selector.deleteAggregatorAndTask(taskName)

    def stopFedDART(self):
        self.logger.log().info("stopFedDART")
        self.runtime.stopRuntime()

    def createInitTask(self,
                       parameterDict={},
                       model=None,
                       hardwareRequirements={},
                       filePath=None,
                       executeFunction=None,
                       configFile=None):
        self.logger.log().debug("createInitTask. " + str(locals()))
        task = InitTask(parameterDict, model, hardwareRequirements, filePath,
                        executeFunction, configFile)
        self._initTask = task

    def startTask(self,
                  taskType=0,
                  parameterDict={},
                  model=None,
                  hardwareRequirements={},
                  filePath=None,
                  executeFunction=None,
                  configFile=None,
                  priority=False,
                  numDevices=-1,
                  cluster=None):
        """!
        @param executeFunction name of function, which should be executed in filePath
        """
        # defaultTask
        taskName = "task_" + str(WorkflowManager.taskID
                                 ) + "_" + datetime.datetime.now().isoformat()
        WorkflowManager.taskID += 1
        if taskType == 0:
            task = DefaultTask(taskName, parameterDict, model,
                               hardwareRequirements, filePath, executeFunction,
                               configFile, numDevices)
        if taskType == 1:
            task = SpecificDeviceTask(taskName, parameterDict, model,
                                      hardwareRequirements, filePath,
                                      executeFunction, configFile)
        # request possibility from selector if task is feasible
        request_status = self._sendTaskRequest(task)
        #task accepted
        if request_status:
            self.selector.addTask2Queue(task, priority)
            self.logger.log().info("task accepted")
        # task rejected
        else:
            taskName = None
            self.logger.log().info(
                "task was not accepted - change your constraints?")
        self.logger.log().debug("start task." + str(locals()))
        return taskName