Ejemplo n.º 1
0
 def __init__(self, db_info):
     _db_client = mg.get_client(usr=db_info['user'],
                                pwd=db_info['pwd'],
                                db_name=db_info['db_name'],
                                address=db_info['address'],
                                port=SystemConstants.MONGODB_PORT)
     self.__db = mg.get_db(_db_client, SystemConstants.MONGODB_NAME)
     self.__workers_info = mg.get_col(self.__db,
                                      SystemConstants.WorkersInfo)
     self.__workers_resource_info = mg.get_col(
         self.__db, SystemConstants.WorkersResourceInfo)
     self.__messenger = Messenger(messenger_type='C/S',
                                  port=SystemConstants.DISCOVERY_PORT)
     self.__logger = utl.get_logger('DiscoveryLogger', 'DiscoveryLog.log')
Ejemplo n.º 2
0
 def prune_nw():
     while True:
         networks = []
         for job in job_buffer[:]:
             job_info = mg.filter_col(mg.get_col(db, job), 'job_name', job)
             if job_info is not None and job_info['status'] == 'Down':
                 networks.append(job_info['job_info']['network']['name'])
                 job_buffer.remove(job)
         docker.rm_networks(dockerClient, networks)
         print('Remove networks:', networks)
         time.sleep(60)
Ejemplo n.º 3
0
    def request_new_job():
        global messenger
        # Write job data into MongoDB
        data = request.get_json()
        data.update({'submit_time': time.time()})
        col_name = data['job_name']
        m_col = mg.get_col(db, col_name)
        mg.insert_doc(m_col, data)

        # Notify job manager
        messenger.send(prompt='newJob', content=col_name)
        return 'OK', 200
Ejemplo n.º 4
0
    def update_job_info(self, schedule):
        for item in schedule:
            job_col = mg.get_col(self.db, item[0])
            job_filter = 'job_info.tasks.%s.container_name' % item[1]

            # add node filed
            target = 'job_info.tasks.%s.node' % item[1]
            mg.update_doc(job_col, job_filter, item[1], target, item[2])

            # add cpuset_cpus field
            target = 'job_info.tasks.%s.cpuset_cpus' % item[1]
            mg.update_doc(job_col, job_filter, item[1], target,
                          ','.join(item[3]))
Ejemplo n.º 5
0
 def find_container(self, container_name):
     # '''
     # Find the worker node the container is locating
     # :param container_name:
     # :return: return node hostname
     # '''
     collections = self.db.collection_names(
         include_system_collections=False)
     temp = []
     for collection in collections:
         filter_key = 'job_info.tasks.%s.container_name' % container_name
         jobs_col = mg.get_col(self.db, collection)
         temp = list(jobs_col.find({filter_key: container_name}))
         if len(temp) != 0:
             break
     return temp[0]['job_info']['tasks'][container_name]['node'] if len(
         temp) != 0 else None
Ejemplo n.º 6
0
    def reset_db(self):
        all_cols = mg.get_all_cols(self.__db)
        if SystemConstants.WorkersResourceInfo in all_cols:
            # Drop worker resource info collection
            mg.drop_col(self.__db_client, SystemConstants.MONGODB_NAME, SystemConstants.WorkersResourceInfo)

        if SystemConstants.WorkersInfo in all_cols:
            # Reset worker info collection
            workers_info_col = mg.get_col(self.__db, SystemConstants.WorkersInfo)
            workers_info_data = mg.find_col(workers_info_col)
            for index, worker in enumerate(workers_info_data[:]):
                for cpu in worker['CPUs']:
                    workers_info_data[index]['CPUs'][cpu] = False
                mg.update_doc(col=workers_info_col,
                              filter_key='hostname',
                              filter_value=worker['hostname'],
                              target_key='CPUs',
                              target_value=workers_info_data[index]['CPUs'])
        print('Reset MongoDB.')
Ejemplo n.º 7
0
        def update_db(msg):
            worker_host = msg.split()[0]
            msg = msg.split()[1]
            task_name = msg
            if task_name not in deployed_tasks:
                deployed_tasks.append(task_name)
                job_name = msg.split('_')[0]
                job_col = mg.get_col(self.__db, job_name)

                # update job collection -- task status
                filter_key = 'job_info.tasks.%s.container_name' % task_name
                target_key = 'job_info.tasks.%s.status' % task_name
                mg.update_doc(job_col, filter_key, task_name, target_key,
                              'Down')

                job_details = mg.find_col(job_col)[0]

                # update job status if all tasks are down
                flag = True
                for task in job_details['job_info']['tasks']:
                    if job_details['job_info']['tasks'][task][
                            'status'] != 'Down':
                        flag = False
                if flag:
                    mg.update_doc(job_col, 'job_name', job_name, 'status',
                                  'Down')
                    mg.update_doc(job_col, 'job_name', job_name, 'end_time',
                                  time.time())

                self.__logger.info('Updating Job collection %s.' % job_name)

                # get the resource utilization of the 'Down' container
                cores = job_details['job_info']['tasks'][task_name][
                    'cpuset_cpus']
                cores = cores.split(',')
                memory = job_details['job_info']['tasks'][task_name][
                    'mem_limit']
                self.__logger.info(
                    'Collecting resources from down containers.')

                # update WorkersInfo collection
                # update cores info
                def update_cores(core):
                    _target_key = 'CPUs.%s' % core
                    mg.update_doc(self.__workers_info, 'hostname', worker_host,
                                  _target_key, False)
                    self.__logger.info('Release core %s status in worker %s' %
                                       (_target_key, worker_host))

                [update_cores(core) for core in cores]

                # update memory info
                worker_info = mg.filter_col(self.__workers_info, 'hostname',
                                            worker_host)
                free_memory = worker_info['MemFree']
                memory = float(memory.split('m')[0])
                free_memory = float(free_memory.split('m')[0])
                updated_memory = memory + free_memory
                updated_memory = str(updated_memory) + 'm'
                mg.update_doc(self.__workers_info, 'hostname', worker_host,
                              'MemFree', updated_memory)
                self.__logger.info(
                    'Updating memory resources in WorkersInfo collection.')

                # update worker resource collection
                mg.update_workers_resource_col(self.__workers_info,
                                               worker_host,
                                               self.__workers_resource_info)
                self.__logger.info(
                    'Updated WorkersResourceInfo collection, because some cores are released.'
                )

                # update job collection -- cpuset_cpus
                target_key = 'job_info.tasks.%s.cpuset_cpus' % task_name
                mg.update_doc(job_col, filter_key, task_name, target_key, '')
                self.__logger.info(
                    'Updated Job collection. Released used cores.')

                # update job collection -- mem_limit
                target_key = 'job_info.tasks.%s.mem_limit' % task_name
                mg.update_doc(job_col, filter_key, task_name, target_key, '')
                self.__logger.info(
                    'Updated Job collection. Released used memory.')
Ejemplo n.º 8
0
def main():
    # clear /etc/exports to avoid duplicated nfs client
    with open('/etc/exports', 'w') as f:
        f.write('')

    os.chdir('/home/%s/RESTfulSwarm/GlobalManager' % utl.get_username())

    global db_address
    global db_client
    global db
    global worker_col
    global worker_resource_col
    global gm_address
    global dockerClient

    gm_address = utl.get_local_address()

    template = {
        "swagger": "2.0",
        "info": {
            "title": "RESTfulSwarm",
            "description": "An RESTful application for Docker Swarm.",
            "contact": {
                "responsibleDeveloper": "Zhuangwei Kang",
                "email": "*****@*****.**"
            },
            "version": "0.0.1"
        },
        "host": '%s:%s' % (gm_address, SystemConstants.GM_PORT),
        "basePath": "",
        "schemes": [
            "http",
        ]
    }

    swagger = Swagger(app, template=template)

    dockerClient = docker.set_client()

    # mongodb
    with open('../DBInfo.json') as f:
        db_info = json.load(f)

    db_client = mg.get_client(usr=db_info['user'],
                              pwd=db_info['pwd'],
                              db_name=db_info['db_name'],
                              address=db_info['address'],
                              port=SystemConstants.MONGODB_PORT)
    db = mg.get_db(db_client, SystemConstants.MONGODB_NAME)
    worker_col = mg.get_col(db, SystemConstants.WorkersInfo)
    worker_resource_col = mg.get_col(db, SystemConstants.WorkersResourceInfo)

    # periodically prune unused network
    def prune_nw():
        while True:
            networks = []
            for job in job_buffer[:]:
                job_info = mg.filter_col(mg.get_col(db, job), 'job_name', job)
                if job_info is not None and job_info['status'] == 'Down':
                    networks.append(job_info['job_info']['network']['name'])
                    job_buffer.remove(job)
            docker.rm_networks(dockerClient, networks)
            print('Remove networks:', networks)
            time.sleep(60)

    prune_nw_thr = threading.Thread(target=prune_nw, args=())
    prune_nw_thr.daemon = True
    prune_nw_thr.start()

    os.chdir('/home/%s/RESTfulSwarm/ManagementEngine' % utl.get_username())

    app.run(host=gm_address, port=SystemConstants.GM_PORT, debug=False)
Ejemplo n.º 9
0
 def __init__(self, db):
     self.db = db
     self.workers_col = mg.get_col(self.db, SystemConstants.WorkersInfo)
     self.workers_resource_col = mg.get_col(
         self.db, SystemConstants.WorkersResourceInfo)