def __init__(self, db_info): _db_client = mg.get_client(usr=db_info['user'], pwd=db_info['pwd'], db_name=db_info['db_name'], address=db_info['address'], port=SystemConstants.MONGODB_PORT) self.__db = mg.get_db(_db_client, SystemConstants.MONGODB_NAME) self.__workers_info = mg.get_col(self.__db, SystemConstants.WorkersInfo) self.__workers_resource_info = mg.get_col( self.__db, SystemConstants.WorkersResourceInfo) self.__messenger = Messenger(messenger_type='C/S', port=SystemConstants.DISCOVERY_PORT) self.__logger = utl.get_logger('DiscoveryLogger', 'DiscoveryLog.log')
def prune_nw(): while True: networks = [] for job in job_buffer[:]: job_info = mg.filter_col(mg.get_col(db, job), 'job_name', job) if job_info is not None and job_info['status'] == 'Down': networks.append(job_info['job_info']['network']['name']) job_buffer.remove(job) docker.rm_networks(dockerClient, networks) print('Remove networks:', networks) time.sleep(60)
def request_new_job(): global messenger # Write job data into MongoDB data = request.get_json() data.update({'submit_time': time.time()}) col_name = data['job_name'] m_col = mg.get_col(db, col_name) mg.insert_doc(m_col, data) # Notify job manager messenger.send(prompt='newJob', content=col_name) return 'OK', 200
def update_job_info(self, schedule): for item in schedule: job_col = mg.get_col(self.db, item[0]) job_filter = 'job_info.tasks.%s.container_name' % item[1] # add node filed target = 'job_info.tasks.%s.node' % item[1] mg.update_doc(job_col, job_filter, item[1], target, item[2]) # add cpuset_cpus field target = 'job_info.tasks.%s.cpuset_cpus' % item[1] mg.update_doc(job_col, job_filter, item[1], target, ','.join(item[3]))
def find_container(self, container_name): # ''' # Find the worker node the container is locating # :param container_name: # :return: return node hostname # ''' collections = self.db.collection_names( include_system_collections=False) temp = [] for collection in collections: filter_key = 'job_info.tasks.%s.container_name' % container_name jobs_col = mg.get_col(self.db, collection) temp = list(jobs_col.find({filter_key: container_name})) if len(temp) != 0: break return temp[0]['job_info']['tasks'][container_name]['node'] if len( temp) != 0 else None
def reset_db(self): all_cols = mg.get_all_cols(self.__db) if SystemConstants.WorkersResourceInfo in all_cols: # Drop worker resource info collection mg.drop_col(self.__db_client, SystemConstants.MONGODB_NAME, SystemConstants.WorkersResourceInfo) if SystemConstants.WorkersInfo in all_cols: # Reset worker info collection workers_info_col = mg.get_col(self.__db, SystemConstants.WorkersInfo) workers_info_data = mg.find_col(workers_info_col) for index, worker in enumerate(workers_info_data[:]): for cpu in worker['CPUs']: workers_info_data[index]['CPUs'][cpu] = False mg.update_doc(col=workers_info_col, filter_key='hostname', filter_value=worker['hostname'], target_key='CPUs', target_value=workers_info_data[index]['CPUs']) print('Reset MongoDB.')
def update_db(msg): worker_host = msg.split()[0] msg = msg.split()[1] task_name = msg if task_name not in deployed_tasks: deployed_tasks.append(task_name) job_name = msg.split('_')[0] job_col = mg.get_col(self.__db, job_name) # update job collection -- task status filter_key = 'job_info.tasks.%s.container_name' % task_name target_key = 'job_info.tasks.%s.status' % task_name mg.update_doc(job_col, filter_key, task_name, target_key, 'Down') job_details = mg.find_col(job_col)[0] # update job status if all tasks are down flag = True for task in job_details['job_info']['tasks']: if job_details['job_info']['tasks'][task][ 'status'] != 'Down': flag = False if flag: mg.update_doc(job_col, 'job_name', job_name, 'status', 'Down') mg.update_doc(job_col, 'job_name', job_name, 'end_time', time.time()) self.__logger.info('Updating Job collection %s.' % job_name) # get the resource utilization of the 'Down' container cores = job_details['job_info']['tasks'][task_name][ 'cpuset_cpus'] cores = cores.split(',') memory = job_details['job_info']['tasks'][task_name][ 'mem_limit'] self.__logger.info( 'Collecting resources from down containers.') # update WorkersInfo collection # update cores info def update_cores(core): _target_key = 'CPUs.%s' % core mg.update_doc(self.__workers_info, 'hostname', worker_host, _target_key, False) self.__logger.info('Release core %s status in worker %s' % (_target_key, worker_host)) [update_cores(core) for core in cores] # update memory info worker_info = mg.filter_col(self.__workers_info, 'hostname', worker_host) free_memory = worker_info['MemFree'] memory = float(memory.split('m')[0]) free_memory = float(free_memory.split('m')[0]) updated_memory = memory + free_memory updated_memory = str(updated_memory) + 'm' mg.update_doc(self.__workers_info, 'hostname', worker_host, 'MemFree', updated_memory) self.__logger.info( 'Updating memory resources in WorkersInfo collection.') # update worker resource collection mg.update_workers_resource_col(self.__workers_info, worker_host, self.__workers_resource_info) self.__logger.info( 'Updated WorkersResourceInfo collection, because some cores are released.' ) # update job collection -- cpuset_cpus target_key = 'job_info.tasks.%s.cpuset_cpus' % task_name mg.update_doc(job_col, filter_key, task_name, target_key, '') self.__logger.info( 'Updated Job collection. Released used cores.') # update job collection -- mem_limit target_key = 'job_info.tasks.%s.mem_limit' % task_name mg.update_doc(job_col, filter_key, task_name, target_key, '') self.__logger.info( 'Updated Job collection. Released used memory.')
def main(): # clear /etc/exports to avoid duplicated nfs client with open('/etc/exports', 'w') as f: f.write('') os.chdir('/home/%s/RESTfulSwarm/GlobalManager' % utl.get_username()) global db_address global db_client global db global worker_col global worker_resource_col global gm_address global dockerClient gm_address = utl.get_local_address() template = { "swagger": "2.0", "info": { "title": "RESTfulSwarm", "description": "An RESTful application for Docker Swarm.", "contact": { "responsibleDeveloper": "Zhuangwei Kang", "email": "*****@*****.**" }, "version": "0.0.1" }, "host": '%s:%s' % (gm_address, SystemConstants.GM_PORT), "basePath": "", "schemes": [ "http", ] } swagger = Swagger(app, template=template) dockerClient = docker.set_client() # mongodb with open('../DBInfo.json') as f: db_info = json.load(f) db_client = mg.get_client(usr=db_info['user'], pwd=db_info['pwd'], db_name=db_info['db_name'], address=db_info['address'], port=SystemConstants.MONGODB_PORT) db = mg.get_db(db_client, SystemConstants.MONGODB_NAME) worker_col = mg.get_col(db, SystemConstants.WorkersInfo) worker_resource_col = mg.get_col(db, SystemConstants.WorkersResourceInfo) # periodically prune unused network def prune_nw(): while True: networks = [] for job in job_buffer[:]: job_info = mg.filter_col(mg.get_col(db, job), 'job_name', job) if job_info is not None and job_info['status'] == 'Down': networks.append(job_info['job_info']['network']['name']) job_buffer.remove(job) docker.rm_networks(dockerClient, networks) print('Remove networks:', networks) time.sleep(60) prune_nw_thr = threading.Thread(target=prune_nw, args=()) prune_nw_thr.daemon = True prune_nw_thr.start() os.chdir('/home/%s/RESTfulSwarm/ManagementEngine' % utl.get_username()) app.run(host=gm_address, port=SystemConstants.GM_PORT, debug=False)
def __init__(self, db): self.db = db self.workers_col = mg.get_col(self.db, SystemConstants.WorkersInfo) self.workers_resource_col = mg.get_col( self.db, SystemConstants.WorkersResourceInfo)