Esempio n. 1
0
    def update_job_info(self, schedule):
        for item in schedule:
            job_col = mg.get_col(self.db, item[0])
            job_filter = 'job_info.tasks.%s.container_name' % item[1]

            # add node filed
            target = 'job_info.tasks.%s.node' % item[1]
            mg.update_doc(job_col, job_filter, item[1], target, item[2])

            # add cpuset_cpus field
            target = 'job_info.tasks.%s.cpuset_cpus' % item[1]
            mg.update_doc(job_col, job_filter, item[1], target,
                          ','.join(item[3]))
    def reset_db(self):
        all_cols = mg.get_all_cols(self.__db)
        if SystemConstants.WorkersResourceInfo in all_cols:
            # Drop worker resource info collection
            mg.drop_col(self.__db_client, SystemConstants.MONGODB_NAME, SystemConstants.WorkersResourceInfo)

        if SystemConstants.WorkersInfo in all_cols:
            # Reset worker info collection
            workers_info_col = mg.get_col(self.__db, SystemConstants.WorkersInfo)
            workers_info_data = mg.find_col(workers_info_col)
            for index, worker in enumerate(workers_info_data[:]):
                for cpu in worker['CPUs']:
                    workers_info_data[index]['CPUs'][cpu] = False
                mg.update_doc(col=workers_info_col,
                              filter_key='hostname',
                              filter_value=worker['hostname'],
                              target_key='CPUs',
                              target_value=workers_info_data[index]['CPUs'])
        print('Reset MongoDB.')
Esempio n. 3
0
def request_new_job():
    data = request.get_json()
    # create overlay network if not exists
    if docker.verify_network(dockerClient,
                             data['job_info']['network']['name']):
        create_overlay_network(network=data['job_info']['network']['name'],
                               driver=data['job_info']['network']['driver'],
                               subnet=data['job_info']['network']['subnet'])

    try:
        # make directory for nfs
        nfs_master_path = '/var/nfs/RESTfulSwarm/%s' % data['job_name']
        os.mkdir(path=nfs_master_path)

        for _task in data['job_info']['tasks']:
            data['job_info']['tasks'][_task].update(
                {'network': data['job_info']['network']['name']})

        # deploy job
        for _task in list(data['job_info']['tasks'].values()):
            new_container(_task)

        # update job status
        mg.update_doc(db[data['job_name']], 'job_name', data['job_name'],
                      'status', 'Deployed')
        mg.update_doc(db[data['job_name']], 'job_name', data['job_name'],
                      'start_time', time.time())

        # update task status
        for task in data['job_info']['tasks'].keys():
            filter_key = 'job_info.tasks.%s.container_name' % task
            target_key = 'job_info.tasks.%s.status' % task
            mg.update_doc(db[data['job_name']], filter_key, task, target_key,
                          'Deployed')

        job_buffer.append(data['job_name'])
        return 'OK', 200
    except Exception as ex:
        traceback.print_exc(file=sys.stderr)
        return str(ex), 400
Esempio n. 4
0
    def process_cores_scheduling_result(self, schedule, core_request,
                                        mem_request_arr, available_workers):
        job_index = 0
        result = []
        waiting_plan = []
        global_task_index = 0
        next_job = False
        task_index = 0
        temp_result = []

        # print(schedule)
        for index, item in enumerate(schedule):
            if next_job is False:
                global_task_index += len(core_request[job_index][1].items())
                next_job = True

            # item=(task_index, assigned_core_id/-1)
            if item[1] != -1:
                # get the first n cores from all free cores because the amount
                # of free cores may be more than requested cores
                cores = []
                for j in range(
                        list(core_request[job_index][1].values())[task_index]):
                    cores.append(list(available_workers.values())[item[1]][0])
                    # remove used cores
                    key = list(available_workers.keys())[item[1]]
                    available_workers[key].pop(0)

                result_item = (
                    core_request[job_index][0],
                    list(core_request[job_index][1].keys())[task_index],
                    list(available_workers.keys())[item[1]], cores)

                temp_result.append(result_item)

                # update free memory
                worker_info = list(
                    self.workers_col.find(
                        {'hostname':
                         list(available_workers.keys())[item[1]]}))[0]

                new_free_mem = utl.memory_size_translator(
                    worker_info['MemFree'])
                request_mem = utl.memory_size_translator(
                    mem_request_arr[index])

                new_free_mem -= request_mem
                new_free_mem = str(new_free_mem) + 'm'
                mg.update_doc(self.workers_col, 'hostname',
                              list(available_workers.keys())[item[1]],
                              'MemFree', new_free_mem)
            else:
                # if resources are not enough, add the job into waiting list
                waiting_plan.append(core_request[job_index][0])

            # update job index
            if index == global_task_index - 1:
                if len(temp_result) == len(core_request[job_index][1]):
                    result.extend(temp_result)
                else:
                    waiting_plan.append(core_request[job_index][0])
                job_index += 1
                next_job = False
                task_index = 0
                temp_result = []
            else:
                task_index += 1

        waiting_plan = list(set(waiting_plan))

        return result, waiting_plan
Esempio n. 5
0
 def update_cores(core):
     _target_key = 'CPUs.%s' % core
     mg.update_doc(self.__workers_info, 'hostname', worker_host,
                   _target_key, False)
     self.__logger.info('Release core %s status in worker %s' %
                        (_target_key, worker_host))
Esempio n. 6
0
        def update_db(msg):
            worker_host = msg.split()[0]
            msg = msg.split()[1]
            task_name = msg
            if task_name not in deployed_tasks:
                deployed_tasks.append(task_name)
                job_name = msg.split('_')[0]
                job_col = mg.get_col(self.__db, job_name)

                # update job collection -- task status
                filter_key = 'job_info.tasks.%s.container_name' % task_name
                target_key = 'job_info.tasks.%s.status' % task_name
                mg.update_doc(job_col, filter_key, task_name, target_key,
                              'Down')

                job_details = mg.find_col(job_col)[0]

                # update job status if all tasks are down
                flag = True
                for task in job_details['job_info']['tasks']:
                    if job_details['job_info']['tasks'][task][
                            'status'] != 'Down':
                        flag = False
                if flag:
                    mg.update_doc(job_col, 'job_name', job_name, 'status',
                                  'Down')
                    mg.update_doc(job_col, 'job_name', job_name, 'end_time',
                                  time.time())

                self.__logger.info('Updating Job collection %s.' % job_name)

                # get the resource utilization of the 'Down' container
                cores = job_details['job_info']['tasks'][task_name][
                    'cpuset_cpus']
                cores = cores.split(',')
                memory = job_details['job_info']['tasks'][task_name][
                    'mem_limit']
                self.__logger.info(
                    'Collecting resources from down containers.')

                # update WorkersInfo collection
                # update cores info
                def update_cores(core):
                    _target_key = 'CPUs.%s' % core
                    mg.update_doc(self.__workers_info, 'hostname', worker_host,
                                  _target_key, False)
                    self.__logger.info('Release core %s status in worker %s' %
                                       (_target_key, worker_host))

                [update_cores(core) for core in cores]

                # update memory info
                worker_info = mg.filter_col(self.__workers_info, 'hostname',
                                            worker_host)
                free_memory = worker_info['MemFree']
                memory = float(memory.split('m')[0])
                free_memory = float(free_memory.split('m')[0])
                updated_memory = memory + free_memory
                updated_memory = str(updated_memory) + 'm'
                mg.update_doc(self.__workers_info, 'hostname', worker_host,
                              'MemFree', updated_memory)
                self.__logger.info(
                    'Updating memory resources in WorkersInfo collection.')

                # update worker resource collection
                mg.update_workers_resource_col(self.__workers_info,
                                               worker_host,
                                               self.__workers_resource_info)
                self.__logger.info(
                    'Updated WorkersResourceInfo collection, because some cores are released.'
                )

                # update job collection -- cpuset_cpus
                target_key = 'job_info.tasks.%s.cpuset_cpus' % task_name
                mg.update_doc(job_col, filter_key, task_name, target_key, '')
                self.__logger.info(
                    'Updated Job collection. Released used cores.')

                # update job collection -- mem_limit
                target_key = 'job_info.tasks.%s.mem_limit' % task_name
                mg.update_doc(job_col, filter_key, task_name, target_key, '')
                self.__logger.info(
                    'Updated Job collection. Released used memory.')
Esempio n. 7
0
 def update_workers_info(self, schedule):
     for item in schedule:
         for core in item[3]:
             target = 'CPUs.%s' % str(core)
             mg.update_doc(self.workers_col, 'hostname', item[2], target,
                           True)