Exemple #1
0
    def __init__(self, app):
        self.logs = logging.getLogger('simSparkLog')
        self.logs.setLevel(logging.DEBUG)
        fh = logging.handlers.RotatingFileHandler('/tmp/simSpark_driver.log',
                                                  maxBytes=10000000,
                                                  backupCount=5)
        fh.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            u'%(asctime)s [%(levelname)s] %(message)s')
        fh.setFormatter(formatter)
        self.logs.addHandler(fh)

        self.config = self.load_config()

        self.app = app
        self.driver_id = None
        self.port = self.config['driver_port']
        self.bport = self.config['backend_port']
        self.parallel_stage = self.config['parallel_stage']
        self.rdds = []
        self.undone = []
        self.stages = []
        self.listener = SparkConn(self.config['driver_host'],
                                  self.config['driver_port'])

        # register driver
        value = {'host': self.config['driver_host'], 'port': self.port}
        self.listener.sendMessage(
            self.wrap_msg(self.config['master_host'],
                          self.config['master_port'], 'register_driver',
                          value))
        self.logs.info('Waiting for registeration feedback')
        while True:
            msg = self.listener.accept()
            if msg['type'] == 'register_driver_success':
                self.driver_id = msg['value']['id']
                break
        # register app
        value = {
            'host': self.config['driver_host'],
            'port': self.port,
            'did': self.driver_id,
            'name': self.app.app_name
        }
        self.listener.sendMessage(
            self.wrap_msg(self.config['master_host'],
                          self.config['master_port'], 'register_app', value))
        self.logs.info('Wait for registeration feedback')
        while True:
            msg = self.listener.accept()
            if msg['type'] == 'resource_update':
                self.app.app_id = msg['value']['id']
                break
        self.comm = backendComm(self)
        self.comm.start()
Exemple #2
0
    def run(self):
        # establish listener
        self.listener = SparkConn(self.config['master_host'],
                                  self.config['master_port'])

        # set up periodical signal
        tick(2.0, self.periodical_signal)

        # main loop
        while True:
            msg = self.listener.accept()
            self.dispensor(msg)
Exemple #3
0
    def run(self):
        self.listener = SparkConn(self.config['worker_host'],
                                  self.config['worker_port'])

        # a timer to set initial register
        tick(5.0, self.register_worker)
        while True:
            msg = self.listener.accept()
            if msg['type'] == 'register_worker_success':
                self.reg_succ_worker(msg['value'])
                self.logs.info('register successed.')
                break
        self.logs.info('Start the main process')
        global timer
        timer.cancel()
        timer = None
        heartbeat_tick(10.0, self.send_heartbeat)
        tick(5.0, self.send_executor_status)
        while True:
            msg = self.listener.accept()
            self.process(msg)
Exemple #4
0
class Application:
    def __init__(self):
        # initialize logger
        self.logs = logging.getLogger('simSparkLog')
        self.logs.setLevel(logging.DEBUG)
        fh = logging.handlers.RotatingFileHandler('/tmp/simSpark_master.log',
                                                  maxBytes=10000000,
                                                  backupCount=5)
        fh.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            u'%(asctime)s [%(levelname)s] %(message)s')
        fh.setFormatter(formatter)
        self.logs.addHandler(fh)

        self.logs.info('simSpark master has been awaken.')
        self.config = self.load_config()
        if self.config['default_core'] < 1 and self.config[
                'default_core'] != -1:
            self.logs.critical('Default core(s) assigned must be positive.')
            sys.exit(1)
        self.apps = []
        self.workers = []
        self.drivers = []
        self.executors = []

    # load configuration
    def load_config(self):
        self.logs.info('<master_config.json> is about to be loaded.')
        config = {
            'master_host': '172.21.0.12',
            'master_port': 11111,
            'webui_port': 8080,
            'worker_timeout': 60,
            'spread_out': True,
            'default_core': -1,
            'reaper_iteration': 15,
            'executor_max_retries': 10
        }
        try:
            with open('master_config.json', 'r') as jsoninput:
                inp = json.load(jsoninput)
            for k in config.keys():
                if k in inp.keys():
                    config[k] = inp[k]
        except IOError:
            self.logs.warning(
                'Failed to read configuration. Use default instead.')
        return config

    # signal sent
    def periodical_signal(self):
        msg = self.wrap_msg(self.config['master_host'],
                            self.config['master_port'], 'check_worker_TO',
                            None)
        self.listener.sendMessage(msg)
        tick(2.0, self.periodical_signal)

    def register_driver_success(self, driver):
        value = {'id': driver.driver_id}
        self.listener.sendMessage(
            self.wrap_msg(driver.host, driver.port, 'register_driver_success',
                          value))

    def feedback_application(self, app):
        el = []
        for e in app.executor_list:
            el.append(e.executor_id)
        value = {'id': app.app_id, 'idle_executor': el, 'busy_executor': []}
        self.logs.info(
            'Feed update to Application %d at address %s through port %d' %
            (app.app_id, app.host, app.port))
        self.listener.sendMessage(
            self.wrap_msg(app.host, app.port, 'resource_update', value))

    def feedback_worker(self, worker):
        value = {'id': worker.worker_id}
        self.listener.sendMessage(
            self.wrap_msg(worker.host, worker.port, 'register_worker_success',
                          value))

    def awake_ghost_worker(self, ghost_heartbeat):
        self.listener.sendMessage(
            self.wrap_msg(ghost_heartbeat['host'], ghost_heartbeat['port'],
                          'register_worker', None))

    def feedback_executor(self, executor, oid):
        value = {'original': oid, 'assigned': executor.executor_id}
        self.listener.sendMessage(
            self.wrap_msg(executor.host, executor.port,
                          'register_executor_success', value))

    def inform_application_ready(self, app):
        value = []
        self.logs.info('executor list: %s' % str(app.executor_list))
        for e in app.executor_list:
            value.append({
                'executor_id': e.executor_id,
                'host': e.host,
                'port': e.port
            })
        self.logs.info(str(value))
        self.listener.sendMessage(
            self.wrap_msg(app.host, app.port, 'resource_ready', value))

    def feedback_ghost_executor(self, host, port, eid):
        self.listener.sendMessage(
            self.wrap_msg(host, port, 'ghost_executor', {'eid': eid}))

    def feedback_executor_elimination(self, executor, e_idx):
        value = {'eid': executor['eid'], 'success': not not e_idx}
        self.listener.sendMessage(
            self.wrap_msg(executor['host'], executor['port'],
                          'elimination_feedback', value))

    def inform_no_resource(self, driver):
        self.listener.sendMessage(
            self.wrap_msg(driver.host, driver.port, 'no_resource', None))

    def request_resource(self, wid, num, aid):
        w_idx = self.search_worker_by_id(wid)
        value = {
            'number': num,
            'app_id': aid,
            'host': self.apps[self.search_application_by_id(aid)].host,
            'port': self.apps[self.search_application_by_id(aid)].port
        }
        self.listener.sendMessage(
            self.wrap_msg(self.workers[w_idx].host, self.workers[w_idx].port,
                          'request_resource', value))

    def inform_app_still_running(self, driver):
        self.listener.sendMessage(
            self.wrap_msg(driver.host, driver.port, 'app_still_running', None))

    def kill_app_feedback(self, app):
        self.listener.sendMessage(
            self.wrap_msg(app.host, app.port, 'app_killed', None))

    # wrap the message
    def wrap_msg(self, address, port, type, value):
        raw = {'type': type, 'value': value}
        wrapped = {'host': address, 'port': port, 'value': json.dumps(raw)}
        return wrapped

    # functional components
    def search_driver_by_id(self, did):
        for d in range(0, len(self.drivers)):
            if self.drivers[d].driver_id == did:
                return d
        return None

    def search_application_by_id(self, aid):
        for a in range(0, len(self.apps)):
            if self.apps[a].app_id == aid:
                return a
        return None

    def search_executor_by_id(self, e_id):
        for ex in range(0, len(self.executors)):
            if self.executors[ex].executor_id == e_id:
                return ex
        return None

    def search_worker_by_id(self, wid):
        for w in range(0, len(self.workers)):
            if self.workers[w].worker_id == wid:
                return w
        return None

    def search_worker_by_address(self, address):
        for w in range(0, len(self.workers)):
            if self.workers[w].host == address:
                return w
        return None

    def kill_executors(self, eliminate_list):
        self.logs.info('About to eliminate %d executors' % len(eliminate_list))
        for e in eliminate_list:
            self.logs.info('Eliminating executor (id:%d)...' % e)
            e_idx = self.search_executor_by_id(e)
            if e_idx != None:
                worker_idx = self.search_worker_by_id(
                    self.executors[e_idx].worker_id)
                app_idx = self.search_application_by_id(
                    self.executors[e_idx].app_id)
                if worker_idx != None:
                    if e in self.workers[worker_idx].executor_list:
                        if not self.workers[worker_idx].alive:
                            self.logs.warning(
                                'The worker [%d] supervising the executor [%d] is not alive.'
                                % (self.executors[e_idx].worker_id, e))
                        self.workers[worker_idx].executor_list.remove(e)
                    else:
                        self.logs.warning(
                            'The worker [%d] does not supervise executor [%d].'
                            % (self.executors[e_idx].worker_id, e))
                else:
                    self.logs.warning('The worker [%d] does not exist.' %
                                      (self.executors[e_idx].worker_id))
                if app_idx != None:
                    if e in self.apps[app_idx].executor_list:
                        if self.apps[app_idx].status != 'ELIMINATED':
                            self.logs.warning(
                                'The application [%d] querying the executor [%d] is still alive.'
                                % (self.executors[e_idx].app_id, e))
                        self.apps[app_idx].executor_list.remove(e)
                    else:
                        self.logs.warning(
                            'The application [%d] does not own executor [%d].'
                            % (self.executors[e_idx].app_id, e))
                else:
                    self.logs.warning('The application [%d] does not exist.' %
                                      (self.executors[e_idx].app_id))
                msg = {'eid': e, 'success': True}
                self.listener.sendMessage(
                    self.wrap_msg(self.executors[e_idx].host,
                                  self.executors[e_idx].port,
                                  'elimination_feedback', msg))
                todel = self.executors[e_idx]
                self.executors.remove(todel)
                del todel
            else:
                self.logs.warning('The executor [%d] does not exist.' % (e))

    def check_application_ready(self, aid):
        app_idx = self.search_application_by_id(aid)
        if app_idx != None:
            if self.apps[app_idx].state == 'WAIT':
                if len(self.apps[app_idx].executor_list
                       ) >= self.apps[app_idx].executors_req:
                    self.logs.info('The resource request of App [%d] is met' %
                                   aid)
                    self.inform_application_ready(self.apps[app_idx])
                    self.apps[app_idx].executor_list = []
        else:
            self.logs.error('Application %d does not exist.' % (aid))

    def register_executor(self, address, port, wid, eid, aid):
        self.logs.info(
            'Executor from worker [%d] for app [%d] requests registration.' %
            (wid, aid))
        worker_idx = self.search_worker_by_id(wid)
        app_idx = self.search_application_by_id(aid)
        if worker_idx != None:
            if app_idx != None:
                self.logs.info(
                    'New executor for application [%d] on worker [%d] is registered'
                    % (aid, wid))
                new_executor = ExecutorUnit(address, port, wid, aid)
                self.executors.append(new_executor)
                self.workers[worker_idx].executor_list.append(new_executor)
                self.apps[app_idx].executor_list.append(new_executor)
                self.feedback_executor(new_executor, eid)
                self.check_application_ready(aid)
            else:
                self.logs.error('Application %d does not exist.' % (aid))
        else:
            self.logs.error('Worker %d does not exists.' % (wid))

    # reaction to message
    def check_workers_heartbeat(self):
        self.logs.info('Checking a worker list at a length of %d' %
                       len(self.workers))
        for worker in self.workers:
            if worker.alive:
                if worker.heartbeat_expired(self.config['worker_timeout']):
                    self.logs.warning('Worker [%d] is out of contact.' %
                                      (worker.worker_id))
                    worker.alive = False
            else:
                if worker.dead(self.config['worker_timeout'],
                               self.config['reaper_iteration']):
                    self.logs.warning(
                        'Worker [%d] will be buried for out of contact after several iterations.'
                        % (worker.worker_id))
                    el = []
                    for e in worker.executor_list:
                        el.append(e)
                    self.kill_executors(el)
                    self.workers.remove(worker)

    def register_application(self, app):
        self.logs.info(
            'Request for registration of application [%s] received.' %
            (app['name']))
        driver_idx = self.search_driver_by_id(app['did'])
        if driver_idx != None:
            if self.drivers[driver_idx].app_id:
                self.logs.critical(
                    'An application is already binded to driver [%d].' %
                    (app['did']))
                self.listener.sendMessage(
                    self.wrap_msg(app['host'], app['port'],
                                  'register_app_fail', None))
                return
            new_app = ApplicationUnit(app['host'], app['port'], app['name'],
                                      app['did'])
            self.apps.append(new_app)
            self.drivers[driver_idx].set_app_id(new_app.app_id)
            self.logs.info(
                'Application [%s] is binded to driver [%d] using id %d.' %
                (app['name'], app['did'], new_app.app_id))
            self.feedback_application(new_app)
        else:
            self.logs.critical('Driver [%d] does not exist.' % (app['did']))
            self.listener.sendMessage(
                self.wrap_msg(app['host'], app['port'], 'register_app_fail',
                              None))

    def kill_application(self, app):
        app_idx = self.search_application_by_id(app['id'])
        if app_idx != None:
            self.apps[app_idx].status = 'ELIMINATED'
            if len(self.apps[app_idx].executor_list) > 0:
                self.logs.warning(
                    'There are executors obtained by application %d.' %
                    (app['id']))
                el = []
                for e in self.apps[app_idx].executor_list:
                    el.append(e)
                self.kill_executors(el)
            driver_idx = self.search_driver_by_id(app['driver_id'])
            if driver_idx != None:
                if self.drivers[driver_idx].app_id != app['id']:
                    self.logs.warning('Driver information not matched.')
                else:
                    self.drivers[driver_idx].set_app_id()
            else:
                self.logs.warning(
                    'None of the drivers is binded with application [%d].' %
                    (app['id']))
            self.kill_app_feedback(self.apps[app_idx])
            todel = self.apps[app_idx]
            self.apps.remove(todel)
            del todel
            self.logs.info('There are %d apps still running' %
                           (len(self.apps)))
        else:
            self.logs.warning('Application %d does not exist.' % (app['id']))

    def worker_heartbeat_ack(self, heartbeat):
        heartbeat['time'] = datetime.strptime(heartbeat['time'],
                                              '%Y-%m-%d %H:%M:%S %f')
        worker_idx = self.search_worker_by_id(heartbeat['id'])
        if worker_idx != None:
            if self.workers[worker_idx].host == heartbeat['host']:
                if not self.workers[worker_idx].alive:
                    self.logs.info('Worker [%d] is awaken.' %
                                   (heartbeat['id']))
                    self.workers[worker_idx].awake()
                self.logs.info('Worker [%d] last heartbeat : %s, latest : %s' %
                               (heartbeat['id'],
                                str(self.workers[worker_idx].last_heartbeat),
                                str(heartbeat['time'])))
                self.workers[worker_idx].update_heartbeat(heartbeat['time'])
            else:
                self.logs.error(
                    'Worker [%d] information does not match with the latest heartbeat.'
                    % (heartbeat['id']))
        else:
            self.logs.warning('Ghost worker {%s} revives.' %
                              (heartbeat['host']))
            self.awake_ghost_worker(heartbeat)

    def register_worker(self, worker):
        worker_idx = self.search_worker_by_address(worker['host'])
        if worker_idx != None:
            self.logs.critical('Worker {%s} already exists.' % worker['host'])
            self.listener.sendMessage(
                self.wrap_msg(worker['host'], worker['port'],
                              'register_worker_success', {'success': False}))
            return
        else:
            new_worker = WorkerUnit(worker['host'], worker['port'])
            self.logs.info('Worker {%s} registers as worker %d.' %
                           (worker['host'], new_worker.worker_id))
            self.workers.append(new_worker)
            self.feedback_worker(new_worker)

    def update_executors_of_worker(self, worker):
        worker_idx = self.search_worker_by_id(worker['id'])
        if worker_idx != None:
            for executor in worker['list']:
                if executor['id'] < 0:
                    self.register_executor(worker['host'], worker['port'],
                                           worker['id'], executor['id'],
                                           executor['app_id'])
                else:
                    e_idx = self.search_executor_by_id(executor['id'])
                    if e_idx != None:
                        self.executors[e_idx].status = executor['status']
                    else:
                        self.logs.error('Executor [%d] does not exist.' %
                                        (executor['id']))
                        self.feedback_ghost_executor(worker['host'],
                                                     worker['port'],
                                                     executor['id'])
        else:
            self.logs.error('Worker [%d] does not exists.' % (worker['id']))

    def eliminate_executor(self, value):
        executors = value['eid']
        self.kill_executors(executors)

    def register_driver(self, driver):
        new_driver = DriverUnit(driver['host'], driver['port'])
        self.drivers.append(new_driver)
        self.register_driver_success(new_driver)
        self.logs.info(
            'Driver registered. ID: %d/host: %s/port: %d' %
            (new_driver.driver_id, new_driver.host, new_driver.port))

    def allocate_resource(self, req):
        if not (req['number'] > 0):
            self.logs.warning('Empty request from driver [%d].' %
                              (req['driver_id']))
            return
        d_idx = self.search_driver_by_id(req['driver_id'])
        if d_idx == None:
            self.logs.error('Unknown driver requests resource.')
            return
        if self.drivers[d_idx].app_id == None:
            self.logs.error(
                'Driver [%d] which no applicaiton is binded to requests resource.'
                % req['driver_id'])
            return
        a_idx = self.search_application_by_id(self.drivers[d_idx].app_id)
        if a_idx == None:
            self.logs.error('Application [%d] does not exist.' % a_idx)
            return
        self.apps[a_idx].executors_req = req['number']
        asstable = {}

        class WorkerHeap():
            def __init__(self):
                self.heap = [{}]

            def pop(self, i):
                if i == 1:
                    return
                if self.heap[i]['weight'] < self.heap[i // 2]['weight']:
                    tmp = self.heap[i // 2]
                    self.heap[i // 2] = self.heap[i]
                    self.heap[i] = tmp
                    self.pop(i // 2)

            def sink(self, i):
                if i * 2 >= len(self.heap):
                    return
                if self.heap[i]['weight'] > self.heap[i * 2]['weight']:
                    sink_left = True
                    if i * 2 + 1 < len(self.heap):
                        if self.heap[i * 2]['weight'] > self.heap[i * 2 +
                                                                  1]['weight']:
                            sink_left = False
                            tmp = self.heap[i]
                            self.heap[i] = self.heap[i * 2 + 1]
                            self.heap[i * 2 + 1] = tmp
                            self.sink(i * 2 + 1)
                    if sink_left:
                        tmp = self.heap[i]
                        self.heap[i] = self.heap[i * 2]
                        self.heap[i * 2] = tmp
                        self.sink(i * 2)
                else:
                    if i * 2 + 1 < len(self.heap):
                        if self.heap[i * 2 +
                                     1]['weight'] < self.heap[i]['weight']:
                            tmp = self.heap[i]
                            self.heap[i] = self.heap[i * 2 + 1]
                            self.heap[i * 2 + 1] = tmp
                            self.sink(i * 2 + 1)

            def insert(self, id, payload):
                node = {'id': str(id), 'weight': payload}
                self.heap.append(node)
                self.pop(len(self.heap) - 1)

            def add_payload(self):
                if len(self.heap) < 2:
                    return
                self.heap[1]['weight'] + 1
                self.sink(1)

        payload_heap = WorkerHeap()

        for w in self.workers:
            asstable[str(w.worker_id)] = 0
            payload_heap.insert(w.worker_id, len(w.executor_list))
        for i in range(0, req['number']):
            asstable[payload_heap.heap[1]['id']] += 1
            payload_heap.add_payload()
        for k in asstable.keys():
            self.request_resource(int(k), asstable[k],
                                  self.drivers[d_idx].app_id)

    def kill_driver(self, did):
        d_idx = self.search_driver_by_id(did)
        self.logs.info(str(d_idx))
        if d_idx != None:
            if self.drivers[d_idx].app_id:
                self.logs.error(
                    'Application [%d] of driver [%d] is still running.' %
                    (self.drivers[d_idx].app_id, did))
                self.inform_app_still_running(self.drivers[d_idx])
                return
            self.logs.error('Driver [%d] is killed.' % (did))
            todel = self.drivers[d_idx]
            self.drivers.remove(todel)
            del todel
        else:
            self.logs.error('Driver [%d] does not exist.' % (did))

    # # message dispensor
    def dispensor(self, msg):
        if msg['type'] == 'check_worker_TO':
            self.check_workers_heartbeat()
        # msg from application
        elif msg['type'] == 'register_app':
            self.register_application(msg['value'])
        elif msg['type'] == 'kill_app':
            self.kill_application(msg['value'])
        # msg from worker
        elif msg['type'] == 'worker_heartbeat':
            self.worker_heartbeat_ack(msg['value'])
        elif msg['type'] == 'register_worker':
            self.register_worker(msg['value'])
        elif msg['type'] == 'update_executors':
            self.update_executors_of_worker(msg['value'])
        elif msg['type'] == 'kill_executor':
            self.eliminate_executor(msg['value'])
    #     # msg from driver
        elif msg['type'] == 'register_driver':
            self.register_driver(msg['value'])
        elif msg['type'] == 'request_resource':
            self.allocate_resource(msg['value'])
        elif msg['type'] == 'kill_driver':
            self.kill_driver(msg['value'])

    # main body

    def run(self):
        # establish listener
        self.listener = SparkConn(self.config['master_host'],
                                  self.config['master_port'])

        # set up periodical signal
        tick(2.0, self.periodical_signal)

        # main loop
        while True:
            msg = self.listener.accept()
            self.dispensor(msg)

    def __del__(self):
        global timer
        if timer:
            timer.cancel()
Exemple #5
0
class workerBody:
    def __init__(self):
        # initialize logger
        self.logs = logging.getLogger('simSparkLog')
        self.logs.setLevel(logging.DEBUG)
        fh = logging.handlers.RotatingFileHandler('/tmp/simSpark_worker.log',
                                                  maxBytes=10000000,
                                                  backupCount=5)
        fh.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            u'%(asctime)s [%(levelname)s] %(message)s')
        fh.setFormatter(formatter)
        self.logs.addHandler(fh)

        self.logs.info('simSpark worker has been awaken.')
        self.config = self.load_config()
        self.executors = []
        self.executors_status = []
        self.exeid = -1
        self.workerid = -1
        self.maxExectuorNum = 10
        self.fetchLock = None
        self.listener = None
        self.driver_listener = None
        self.exeLock = threading.Lock()

        self.appList = []

    def __del__(self):
        global timer
        if timer:
            timer.cancel()
        global heartbeat_timer
        if heartbeat_timer:
            heartbeat_timer.cancel()

    def fetch_info(self, rddid, host, port):
        self.fetchLock.acquire()
        msg = {
            'rid': rddid,
            'host': self.config['worker_host'],
            'port': self.config['fetch_port'],
        }
        wrapMsg = self.wrap_msg(host, port, 'fetch_info', msg)
        self.driver_listener.sendMessage(wrapMsg)
        while True:
            msg = self.driver_listener.accept()
            if msg['type'] == 'fetch_info_ack':
                self.fetchLock.release()
                return msg['value']

    def fetch_data(self, rddid, pid, host, port):
        self.fetchLock.acquire()
        msg = {
            'pidx': pid,
            'rid': rddid,
            'host': self.config['worker_host'],
            'port': self.config['fetch_port'],
        }
        wrapMsg = self.wrap_msg(host, port, 'fetch_data', msg)
        self.driver_listener.sendMessage(wrapMsg)
        msg = None
        while True:
            msg = self.driver_listener.accept()
            if msg['type'] == 'fetch_data_ack':
                self.fetchLock.release()
                return msg['value']

    # todo still need to confirm the interface
    def send_result(self, rddid, pid, host, port):
        self.fetchLock.acquire()
        msg = {
            'host': self.config['worker_host'],
            'port': self.config['worker_port'],
            'pidx': pid,
            'rid': rddid,
            'method': 0
        }
        wrapMsg = self.wrap_msg(host, port, 'task_finished', msg)
        self.driver_listener.sendMessage(wrapMsg)
        self.fetchLock.release()

    def send_data_to_driver(self, value):
        appid = value['appid']
        rid = value['rid']
        pidx = value['pidx']
        dhost = value['host']
        dport = value['port']
        e = self.search_app_by_id(appid)
        ctx = self.appList[e].context
        result = ctx.get_partition_data(rid, pidx)
        wrapmsg = self.wrap_msg(dhost, dport, 'fetch_data_ack', result)
        self.listener.sendMessage(wrapmsg)

    # without changed,need change after use
    def load_config(self):
        config = {
            'master_host': '172.21.0.12',
            'master_port': 11111,
            'worker_host': '172.21.0.14',
            'worker_port': 11111,
            'webui_port': 8080,
            'fetch_port': 11112,
            'worker_timeout': 60,
            'spread_out': True,
            'default_core': -1,
            'reaper_iteration': 15,
            'executor_max_retries': 10
        }
        try:
            with open('worker_config.json', 'r') as jsoninput:
                inp = json.load(jsoninput)
            for k in config.keys():
                if k in inp.keys():
                    config[k] = inp[k]
        except IOError:
            self.logs.warning(
                'Failed to read configuration. Use default instead.')
        return config

    # todo:need the worker to send a host and port
    def reg_succ_worker(self, value):
        self.workerid = value['id']
        # initialize the fetch_port lock and the driver_listener
        self.fetchLock = threading.Lock()
        self.driver_listener = SparkConn(self.config['worker_host'],
                                         self.config['fetch_port'])

    def reg_succ_executor(self, value):
        oid = value['original']
        e = self.search_executor_by_id(oid)
        if e != None:
            self.executors[e].eid = value['assigned']
            self.executors[e].status = 'ALIVE'
            self.logs.info('An executor got the new id %d' %
                           self.executors[e].eid)
        else:
            self.logs.warning('Failed to read the right executor')

    def send_executor_status(self):
        renew_list = []
        exelen = len(self.executors)
        for e in range(0, exelen):
            exe = self.executors[e]
            if exe.status != self.executors_status[e]:
                renew_list.append({
                    'id': exe.eid,
                    'status': exe.status,
                    'app_id': exe.appid
                })
                self.executors_status[e] = exe.status
        if not (renew_list == []):
            msg = {
                'id': self.workerid,
                'host': self.config['worker_host'],
                'port': self.config['worker_port'],
                'list': renew_list
            }
            wrappedmsg = self.wrap_msg(self.config['master_host'],
                                       self.config['master_port'],
                                       'update_executors', msg)
            self.listener.sendMessage(wrappedmsg)
        # check if there is an executor is completed
        self.logs.info('Update executors %s' % str(renew_list))
        eid_list = []
        for nex in renew_list:
            if nex['status'] == 'COMPLETED':
                eid_list.append(nex['id'])
        if not (eid_list == []):
            delmsg = {
                'host': self.config['worker_host'],
                'port': self.config['worker_port'],
                'eid': eid_list
            }
            wrapmsg = self.wrap_msg(self.config['master_host'],
                                    self.config['master_port'],
                                    'kill_executor', delmsg)
            self.listener.sendMessage(wrapmsg)
            self.logs.info('These executors id will be killed %s' %
                           str(eid_list))
        tick(5.0, self.send_executor_status)

    # todo
    def del_executor(self, value):
        if value['success']:
            eid = value['eid']
            pos = self.search_executor_by_id(eid)
            self.logs.info('Kill the executor with eid:%d', eid)
            del self.executors[pos]
            del self.executors_status[pos]

    # todo
    def req_executor(self, value):
        num = value['number']

        # host = value['host']
        # port = value['port']
        # self.appId = value['app_id']
        elist = []
        for i in range(0, num):
            ex = executor.executor(self.exeid, value['app_id'], self.exeLock)
            self.executors.append(ex)
            self.executors_status.append(ex.status)
            idmsg = {
                'id': self.exeid,
                'status': ex.status,
                'app_id': value['app_id']
            }
            elist.append(idmsg)
            self.exeid -= 1
        msg = {
            'id': self.workerid,
            'host': self.config['worker_host'],
            'port': self.config['worker_port'],
            'list': elist
        }
        wrapmsg = self.wrap_msg(self.config['master_host'],
                                self.config['master_port'], 'update_executors',
                                msg)
        self.listener.sendMessage(wrapmsg)

    def send_heartbeat(self):
        msg = {
            'id': self.workerid,
            'host': self.config['worker_host'],
            'port': self.config['worker_port'],
            'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S %f')
        }
        wrapmsg = self.wrap_msg(self.config['master_host'],
                                self.config['master_port'], 'worker_heartbeat',
                                msg)
        self.listener.sendMessage(wrapmsg)
        heartbeat_tick(10.0, self.send_heartbeat)

    '''
    def cleanCatalog(self):
        pass
    '''

    def register_worker(self):
        worker = {
            'host': self.config['worker_host'],
            'port': self.config['worker_port']
        }
        wrapped_msg = self.wrap_msg(self.config['master_host'],
                                    self.config['master_port'],
                                    'register_worker', worker)
        # print wrapped_msg
        self.listener.sendMessage(wrapped_msg)
        tick(5.0, self.register_worker)

    def ghost_executor(self, value):
        pass

    def reregister(self):
        self.register_worker()

    # todo open the thread pool to run the executors in parallel, still need to add port and host
    def pending_task(self, value):
        eid = value['eid']
        rid = value['rid']
        pid = value['pidx']
        appid = value['app_id']
        host = value['host']
        port = value['port']
        app_pos = self.search_app_by_id(appid)
        if app_pos == None:
            self.add_app(appid, host, port)
            app_pos = len(self.appList) - 1
        app = self.appList[app_pos]
        index = self.search_executor_by_id(eid)
        if index != None:
            self.logs.info("appis:%s, ctxis:%s" % (str(app), str(app.context)))
            self.executors[index].setId(rid, pid, app.context)
            self.logs.info("Executor %d begin" % eid)
            self.executors[index].start()
        else:
            self.logs.critical('Missing executor id.')

    # wrap the message
    def wrap_msg(self, address, port, type, value):
        raw = {'type': type, 'value': value}
        wrapped = {'host': address, 'port': port, 'value': json.dumps(raw)}
        return wrapped

    def search_executor_by_id(self, eid):
        for e in range(0, len(self.executors)):
            if self.executors[e].eid == eid:
                return e
        return None

    def search_app_by_id(self, appid):
        for e in range(0, len(self.appList)):
            if self.appList[e].appid == appid:
                return e
        return None

    def add_app(self, appid, host, port):
        napp = appInfo(appid, host, port, self)
        self.appList.append(napp)
        return napp

    def delete_app(self, value):
        appid = value['appid']
        index = self.search_app_by_id(appid)
        if index:
            del self.appList[index]

    def process(self, msg):
        if msg['type'] == 'request_resource':
            self.req_executor(msg['value'])
        elif msg['type'] == 'register_worker':
            self.reregister()
        elif msg['type'] == 'register_executor_success':
            self.reg_succ_executor(msg['value'])
        elif msg['type'] == 'elimination_feedback':
            self.del_executor(msg['value'])
        elif msg['type'] == 'ghost_executor':
            self.ghost_executor(msg['value'])
        elif msg['type'] == 'pending_task':
            self.pending_task(msg['value'])
        elif msg['type'] == 'delete_app':
            self.delete_app(msg['value'])
        elif msg['type'] == 'fetch_data':
            self.send_data_to_driver(msg['value'])

    def run(self):
        self.listener = SparkConn(self.config['worker_host'],
                                  self.config['worker_port'])

        # a timer to set initial register
        tick(5.0, self.register_worker)
        while True:
            msg = self.listener.accept()
            if msg['type'] == 'register_worker_success':
                self.reg_succ_worker(msg['value'])
                self.logs.info('register successed.')
                break
        self.logs.info('Start the main process')
        global timer
        timer.cancel()
        timer = None
        heartbeat_tick(10.0, self.send_heartbeat)
        tick(5.0, self.send_executor_status)
        while True:
            msg = self.listener.accept()
            self.process(msg)
Exemple #6
0
 def reg_succ_worker(self, value):
     self.workerid = value['id']
     # initialize the fetch_port lock and the driver_listener
     self.fetchLock = threading.Lock()
     self.driver_listener = SparkConn(self.config['worker_host'],
                                      self.config['fetch_port'])
Exemple #7
0
class backendComm(threading.Thread):
    def __init__(self, ctx):
        threading.Thread.__init__(self)
        self.context = ctx
        self.running = threading.Event()
        self.running.set()

    def query_rdd(self, q):
        rdd = self.context.search_rdd_by_id(q['rid'])
        if not rdd:
            self.lis.sendMessage(
                self.context.wrap_msg(q['host'], q['port'], 'Non-exist_rdd',
                                      q['rid']))
        else:
            dep = []
            for d in rdd.dependencies:
                dep.append(d.rdd_id)
            value = {
                'rdd_type': rdd.type,
                'part_len': len(rdd.partitions),
                'dependencies': dep,
                'funtype': rdd.funtype,
                'fun': None
            }
            if rdd.fun != None:
                value['fun'] = rdd.fun.__name__
            self.lis.sendMessage(
                self.context.wrap_msg(q['host'], q['port'], 'fetch_info_ack',
                                      value))

    def query_partition(self, q):
        rdd = self.context.search_rdd_by_id(q['rid'])
        if not rdd:
            self.lis.sendMessage(
                self.context.wrap_msg(q['host'], q['port'], 'Non-exist_rdd',
                                      q['rid']))
        else:
            if q['pidx'] >= len(rdd.partitions):
                self.lis.sendMessage(
                    self.context.wrap_msg(q['host'], q['port'],
                                          'Non-exist_partition', {
                                              'rid': q['rid'],
                                              'pidx': q['pidx']
                                          }))
            else:
                if not rdd.partitions[q['pidx']].fetchable:
                    self.lis.sendMessage(
                        self.context.wrap_msg(q['host'], q['port'],
                                              'not_stored_partition', {
                                                  'rid': q['rid'],
                                                  'pidx': q['pidx']
                                              }))
                else:
                    self.lis.sendMessage(
                        self.context.wrap_msg(
                            q['host'], q['port'], 'fetch_data_ack',
                            rdd.partitions[q['pidx']].records))

    def update_task(self, u):
        rdd = self.context.search_rdd_by_id(u['rid'])
        if not rdd:
            self.lis.sendMessage(
                self.context.wrap_msg(u['host'], u['port'], 'Non-exist_rdd',
                                      u['rid']))
        else:
            if u['pidx'] >= len(rdd.partitions):
                self.lis.sendMessage(
                    self.context.wrap_msg(u['host'], u['port'],
                                          'Non-exist_partition', {
                                              'rid': u['rid'],
                                              'pidx': u['pidx']
                                          }))
            else:
                rdd.partitions[u['pidx']].update_source((u['host'], u['port']))
                rdd.partitions[u['pidx']].method = u['method']
                stage = self.context.search_stage_by_rdd(rdd.rdd_id)
                if not stage:
                    self.context.logs.critical('Missing stage.')
                    return
                stage.task_done[u['pidx']] = True
                if stage.done:
                    stage.finish()
                self.lis.sendMessage(
                    self.context.wrap_msg(u['host'], u['port'],
                                          'task_finished_ack', {
                                              'rid': u['rid'],
                                              'pidx': u['pidx']
                                          }))

    def dispense(self, msg):
        if msg['type'] == 'fetch_info':
            self.query_rdd(msg['value'])
        elif msg['type'] == 'fetch_data':
            self.query_partition(msg['value'])
        elif msg['type'] == 'task_finished':
            self.update_task(msg['value'])

    def run(self):
        self.lis = SparkConn(self.context.config['driver_host'],
                             self.context.bport)
        while self.running.is_set():
            msg = self.lis.accept()
            self.dispense(msg)

    def collapse(self):
        self.running.clear()
Exemple #8
0
class simContext:
    def __init__(self, app):
        self.logs = logging.getLogger('simSparkLog')
        self.logs.setLevel(logging.DEBUG)
        fh = logging.handlers.RotatingFileHandler('/tmp/simSpark_driver.log',
                                                  maxBytes=10000000,
                                                  backupCount=5)
        fh.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            u'%(asctime)s [%(levelname)s] %(message)s')
        fh.setFormatter(formatter)
        self.logs.addHandler(fh)

        self.config = self.load_config()

        self.app = app
        self.driver_id = None
        self.port = self.config['driver_port']
        self.bport = self.config['backend_port']
        self.parallel_stage = self.config['parallel_stage']
        self.rdds = []
        self.undone = []
        self.stages = []
        self.listener = SparkConn(self.config['driver_host'],
                                  self.config['driver_port'])

        # register driver
        value = {'host': self.config['driver_host'], 'port': self.port}
        self.listener.sendMessage(
            self.wrap_msg(self.config['master_host'],
                          self.config['master_port'], 'register_driver',
                          value))
        self.logs.info('Waiting for registeration feedback')
        while True:
            msg = self.listener.accept()
            if msg['type'] == 'register_driver_success':
                self.driver_id = msg['value']['id']
                break
        # register app
        value = {
            'host': self.config['driver_host'],
            'port': self.port,
            'did': self.driver_id,
            'name': self.app.app_name
        }
        self.listener.sendMessage(
            self.wrap_msg(self.config['master_host'],
                          self.config['master_port'], 'register_app', value))
        self.logs.info('Wait for registeration feedback')
        while True:
            msg = self.listener.accept()
            if msg['type'] == 'resource_update':
                self.app.app_id = msg['value']['id']
                break
        self.comm = backendComm(self)
        self.comm.start()

    def __del__(self):
        value = {'id': self.app.app_id, 'driver_id': self.driver_id}
        self.listener.sendMessage(
            self.wrap_msg(self.config['master_host'],
                          self.config['master_port'], 'kill_app', value))
        while True:
            msg = self.listener.accept()
            if msg['type'] == 'app_killed':
                break
        value = self.driver_id
        self.listener.sendMessage(
            self.wrap_msg(self.config['master_host'],
                          self.config['master_port'], 'kill_driver', value))
        self.comm.collapse()

    def wrap_msg(self, address, port, type, value):
        raw = {'type': type, 'value': value}
        wrapped = {'host': address, 'port': port, 'value': json.dumps(raw)}
        return wrapped

    def load_config(self):
        self.logs.info('<driver_config.json> is about to be loaded.')
        config = {
            'master_host': '172.21.0.12',
            'master_port': 11111,
            'driver_host': '172.21.0.3',
            'driver_port': 10001,
            'backend_port': 10002,
            'parallel_stage': 1,
            'timeout': 60
        }
        try:
            with open('driver_config.json', 'r') as jsoninput:
                inp = json.load(jsoninput)
            for k in config.keys():
                if k in inp.keys():
                    config[k] = inp[k]
        except IOError:
            self.logs.warning(
                'Failed to read configuration. Use default instead.')
        return config

    def parallelize(self, arr, fineness=-1):
        part = []
        if fineness == 0 or fineness < -1:
            self.logs.error(
                'Invalid arguments of parallelism. Failed to create RDD')
            return None
        if fineness == -1:
            fineness = len(arr)
        for i in range(0, fineness):
            l = len(arr) // fineness * i
            r = len(arr) // fineness * (i + 1)
            if i == fineness - 1:
                r = len(arr)
            data = arr[l:r]
            new_par = simPartition(self, i, data)
            part.append(new_par)
        new_rdd = simRDD(self, [], part)
        new_rdd._register()
        return new_rdd

    def search_stage_by_rdd(self, rid):
        for s in self.stages:
            if s.rdd.rdd_id == rid:
                return s
        return None

    def search_rdd_by_id(self, rid):
        for r in self.rdds:
            if r.rdd_id == rid:
                return r
        return None

    def resource_request(self, n=1):
        value = {'driver_id': self.driver_id, 'number': n}
        self.listener.sendMessage(
            self.wrap_msg(self.config['master_host'],
                          self.config['master_port'], 'request_resource',
                          value))

    @property
    def ready_stages(self):
        ret = []
        for stage in self.undone:
            ready = True
            for pstage in stage.parent_stage:
                if pstage in self.undone:
                    ready = False
                    break
            if ready:
                ret.append(stage)
                if self.parallel_stage > 0 and len(ret) == self.parallel_stage:
                    return ret
        return ret

    def fetch_partition(self, source, rid, pidx, frommem):
        if rid == None:
            return None
        if frommem:
            value = {
                'appid': self.app.app_id,
                'host': self.config['driver_host'],
                'port': self.config['driver_port'],
                'rid': rid,
                'pidx': pidx
            }
            self.listener.sendMessage(
                self.wrap_msg(source[0], source[1], 'fetch_data', value))
            while True:
                msg = self.listener.accept()
                if msg['type'] == 'fetch_data_ack':
                    return msg['value']
        return None

    def list_clear(self, stages):
        for stage in stages:
            if not stage.done:
                return False
        return True

    def pend_task(self, executor, rid, pidx):
        value = {
            'app_id': self.app.app_id,
            'eid': executor['executor_id'],
            'rid': rid,
            'pidx': pidx,
            'host': self.config['driver_host'],
            'port': self.config['backend_port']
        }
        self.listener.sendMessage(
            self.wrap_msg(executor['host'], executor['port'], 'pending_task',
                          value))
Exemple #9
0
 def run(self):
     self.lis = SparkConn(self.context.config['driver_host'],
                          self.context.bport)
     while self.running.is_set():
         msg = self.lis.accept()
         self.dispense(msg)