def test_kill_task(mocker): ID = str(uuid.uuid4()) sched = mocker.Mock() framework = {'id': {'value': ID}} master = mocker.Mock() driver = MesosSchedulerDriver(sched, framework, master) driver._send = mocker.Mock() driver.killTask({"value": "my-task"}) driver._send.assert_not_called() driver._stream_id = str(uuid.uuid4()) driver.killTask({"value": "my-task"}) driver._send.assert_called_once_with({ 'type': 'KILL', 'framework_id': {'value': ID}, 'kill': { 'task_id': {'value': 'my-task'}, } })
class ProcScheduler(Scheduler): def __init__(self): self.framework_id = None self.framework = self._init_framework() self.executor = None self.master = str(CONFIG.get("master", os.environ["MESOS_MASTER"])) self.driver = MesosSchedulerDriver(self, self.framework, self.master) self.procs_pending = {} self.procs_launched = {} self.slave_to_proc = {} self._lock = RLock() def _init_framework(self): framework = mesos_pb2.FrameworkInfo() framework.user = getpass.getuser() framework.name = repr(self) framework.hostname = socket.gethostname() return framework def _init_executor(self): executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "default" executor.command.value = "%s -m %s.executor" % (sys.executable, __package__) mem = executor.resources.add() mem.name = "mem" mem.type = mesos_pb2.Value.SCALAR mem.scalar.value = MIN_MEMORY cpus = executor.resources.add() cpus.name = "cpus" cpus.type = mesos_pb2.Value.SCALAR cpus.scalar.value = MIN_CPUS if "PYTHONPATH" in os.environ: var = executor.command.environment.variables.add() var.name = "PYTHONPATH" var.value = os.environ["PYTHONPATH"] executor.framework_id.value = str(self.framework_id.value) return executor def _init_task(self, proc, offer): task = mesos_pb2.TaskInfo() task.task_id.value = str(proc.id) task.slave_id.value = offer.slave_id.value task.name = repr(proc) task.executor.MergeFrom(self.executor) task.data = pickle.dumps(proc.params) cpus = task.resources.add() cpus.name = "cpus" cpus.type = mesos_pb2.Value.SCALAR cpus.scalar.value = proc.cpus mem = task.resources.add() mem.name = "mem" mem.type = mesos_pb2.Value.SCALAR mem.scalar.value = proc.mem return task def _filters(self, seconds): f = mesos_pb2.Filters() f.refuse_seconds = seconds return f def __repr__(self): return "%s[%s]: %s" % (self.__class__.__name__, os.getpid(), " ".join(sys.argv)) def registered(self, driver, framework_id, master_info): with self._lock: logger.info("Framework registered with id=%s, master=%s" % (framework_id, master_info)) self.framework_id = framework_id self.executor = self._init_executor() def resourceOffers(self, driver, offers): def get_resources(offer): cpus, mem = 0.0, 0.0 for r in offer.resources: if r.name == "cpus": cpus = float(r.scalar.value) elif r.name == "mem": mem = float(r.scalar.value) return cpus, mem with self._lock: random.shuffle(offers) for offer in offers: if not self.procs_pending: logger.debug("Reject offers forever for no pending procs, " "offers=%s" % (offers,)) driver.launchTasks(offer.id, [], self._filters(FOREVER)) continue cpus, mem = get_resources(offer) tasks = [] for proc in self.procs_pending.values(): if cpus >= proc.cpus and mem >= proc.mem: tasks.append(self._init_task(proc, offer)) del self.procs_pending[proc.id] self.procs_launched[proc.id] = proc cpus -= proc.cpus mem -= proc.mem seconds = 5 + random.random() * 5 driver.launchTasks(offer.id, tasks, self._filters(seconds)) if tasks: logger.info( "Accept offer for procs, offer=%s, " "procs=%s, filter_time=%s" % (offer, [int(t.task_id.value) for t in tasks], seconds) ) else: logger.info("Retry offer for procs later, offer=%s, " "filter_time=%s" % (offer, seconds)) def _call_finished(self, proc_id, success, message, data, slave_id=None): with self._lock: proc = self.procs_launched.pop(proc_id) if slave_id is not None: if slave_id in self.slave_to_proc: self.slave_to_proc[slave_id].remove(proc_id) else: for slave_id, procs in self.slave_to_proc.iteritems(): if proc_id in procs: procs.remove(proc_id) proc._finished(success, message, data) def statusUpdate(self, driver, update): with self._lock: proc_id = int(update.task_id.value) logger.info("Status update for proc, id=%s, state=%s" % (proc_id, update.state)) if update.state == mesos_pb2.TASK_RUNNING: if update.slave_id.value in self.slave_to_proc: self.slave_to_proc[update.slave_id.value].add(proc_id) else: self.slave_to_proc[update.slave_id.value] = set([proc_id]) proc = self.procs_launched[proc_id] proc._started() elif update.state >= mesos_pb2.TASK_FINISHED: slave_id = update.slave_id.value success = update.state == mesos_pb2.TASK_FINISHED message = update.message data = update.data and pickle.loads(update.data) self._call_finished(proc_id, success, message, data, slave_id) driver.reviveOffers() def offerRescinded(self, driver, offer_id): with self._lock: if self.procs_pending: logger.info("Revive offers for pending procs") driver.reviveOffers() def slaveLost(self, driver, slave_id): with self._lock: for proc_id in self.slave_to_proc.pop(slave_id, []): self._call_finished(proc_id, False, "Slave lost", None, slave_id) def error(self, driver, message): with self._lock: for proc in self.procs_pending.values(): self._call_finished(proc.id, False, "Stopped", None) for proc in self.procs_launched.values(): self._call_finished(proc.id, False, "Stopped", None) self.stop() def start(self): self.driver.start() def stop(self): assert not self.driver.aborted self.driver.stop() def submit(self, proc): if self.driver.aborted: raise RuntimeError("driver already aborted") with self._lock: if proc.id not in self.procs_pending: logger.info("Try submit proc, id=%s", (proc.id,)) self.procs_pending[proc.id] = proc if len(self.procs_pending) == 1: logger.info("Revive offers for pending procs") self.driver.reviveOffers() else: raise ValueError("Proc with same id already submitted") def cancel(self, proc): if self.driver.aborted: raise RuntimeError("driver already aborted") with self._lock: if proc.id in self.procs_pending: del self.procs_pending[proc.id] elif proc.id in self.procs_launched: del self.procs_launched[proc.id] self.driver.killTask(mesos_pb2.TaskID(value=str(proc.id))) for slave_id, procs in self.slave_to_proc.items(): procs.pop(proc.id) if not procs: del self.slave_to_proc[slave_id] def send_data(self, pid, type, data): if self.driver.aborted: raise RuntimeError("driver already aborted") msg = pickle.dumps((pid, type, data)) for slave_id, procs in self.slave_to_proc.iteritems(): if pid in procs: self.driver.sendFrameworkMessage(self.executor.executor_id, mesos_pb2.SlaveID(value=slave_id), msg) return raise RuntimeError("Cannot find slave for pid %s" % (pid,))
class ProcScheduler(Scheduler): def __init__(self): self.framework_id = None self.framework = self._init_framework() self.executor = None self.master = str(CONFIG.get('master', os.environ['MESOS_MASTER'])) self.driver = MesosSchedulerDriver(self, self.framework, self.master) self.procs_pending = {} self.procs_launched = {} self.slave_to_proc = {} self._lock = RLock() def _init_framework(self): framework = mesos_pb2.FrameworkInfo() framework.user = getpass.getuser() framework.name = repr(self) framework.hostname = socket.gethostname() return framework def _init_executor(self): executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = 'default' executor.command.value = '%s -m %s.executor' % ( sys.executable, __package__) mem = executor.resources.add() mem.name = 'mem' mem.type = mesos_pb2.Value.SCALAR mem.scalar.value = MIN_MEMORY cpus = executor.resources.add() cpus.name = 'cpus' cpus.type = mesos_pb2.Value.SCALAR cpus.scalar.value = MIN_CPUS if 'PYTHONPATH' in os.environ: var = executor.command.environment.variables.add() var.name = 'PYTHONPATH' var.value = os.environ['PYTHONPATH'] executor.framework_id.value = str(self.framework_id.value) return executor def _init_task(self, proc, offer): task = mesos_pb2.TaskInfo() task.task_id.value = str(proc.id) task.slave_id.value = offer.slave_id.value task.name = repr(proc) task.executor.MergeFrom(self.executor) task.data = pickle.dumps(proc.params) cpus = task.resources.add() cpus.name = 'cpus' cpus.type = mesos_pb2.Value.SCALAR cpus.scalar.value = proc.cpus mem = task.resources.add() mem.name = 'mem' mem.type = mesos_pb2.Value.SCALAR mem.scalar.value = proc.mem return task def _filters(self, seconds): f = mesos_pb2.Filters() f.refuse_seconds = seconds return f def __repr__(self): return "%s[%s]: %s" % ( self.__class__.__name__, os.getpid(), ' '.join(sys.argv)) def registered(self, driver, framework_id, master_info): with self._lock: logger.info('Framework registered with id=%s, master=%s' % ( framework_id, master_info)) self.framework_id = framework_id self.executor = self._init_executor() def resourceOffers(self, driver, offers): def get_resources(offer): cpus, mem = 0.0, 0.0 for r in offer.resources: if r.name == 'cpus': cpus = float(r.scalar.value) elif r.name == 'mem': mem = float(r.scalar.value) return cpus, mem with self._lock: random.shuffle(offers) for offer in offers: if not self.procs_pending: logger.debug('Reject offers forever for no pending procs, ' 'offers=%s' % (offers, )) driver.launchTasks(offer.id, [], self._filters(FOREVER)) continue cpus, mem = get_resources(offer) tasks = [] for proc in self.procs_pending.values(): if cpus >= proc.cpus and mem >= proc.mem: tasks.append(self._init_task(proc, offer)) del self.procs_pending[proc.id] self.procs_launched[proc.id] = proc cpus -= proc.cpus mem -= proc.mem seconds = 5 + random.random() * 5 driver.launchTasks(offer.id, tasks, self._filters(seconds)) if tasks: logger.info('Accept offer for procs, offer=%s, ' 'procs=%s, filter_time=%s' % ( offer, [int(t.task_id.value) for t in tasks], seconds)) else: logger.info('Retry offer for procs later, offer=%s, ' 'filter_time=%s' % ( offer, seconds)) def _call_finished(self, proc_id, success, message, data, slave_id=None): with self._lock: proc = self.procs_launched.pop(proc_id) if slave_id is not None: if slave_id in self.slave_to_proc: self.slave_to_proc[slave_id].remove(proc_id) else: for slave_id, procs in self.slave_to_proc.iteritems(): if proc_id in procs: procs.remove(proc_id) proc._finished(success, message, data) def statusUpdate(self, driver, update): with self._lock: proc_id = int(update.task_id.value) logger.info('Status update for proc, id=%s, state=%s' % ( proc_id, update.state)) if update.state == mesos_pb2.TASK_RUNNING: if update.slave_id.value in self.slave_to_proc: self.slave_to_proc[update.slave_id.value].add(proc_id) else: self.slave_to_proc[update.slave_id.value] = set([proc_id]) proc = self.procs_launched[proc_id] proc._started() elif update.state >= mesos_pb2.TASK_FINISHED: slave_id = update.slave_id.value success = (update.state == mesos_pb2.TASK_FINISHED) message = update.message data = update.data and pickle.loads(update.data) self._call_finished(proc_id, success, message, data, slave_id) driver.reviveOffers() def offerRescinded(self, driver, offer_id): with self._lock: if self.procs_pending: logger.info('Revive offers for pending procs') driver.reviveOffers() def slaveLost(self, driver, slave_id): with self._lock: for proc_id in self.slave_to_proc.pop(slave_id, []): self._call_finished( proc_id, False, 'Slave lost', None, slave_id) def error(self, driver, message): with self._lock: for proc in self.procs_pending.values(): self._call_finished(proc.id, False, 'Stopped', None) for proc in self.procs_launched.values(): self._call_finished(proc.id, False, 'Stopped', None) self.stop() def start(self): self.driver.start() def stop(self): assert not self.driver.aborted self.driver.stop() def submit(self, proc): if self.driver.aborted: raise RuntimeError('driver already aborted') with self._lock: if proc.id not in self.procs_pending: logger.info('Try submit proc, id=%s', (proc.id,)) self.procs_pending[proc.id] = proc if len(self.procs_pending) == 1: logger.info('Revive offers for pending procs') self.driver.reviveOffers() else: raise ValueError('Proc with same id already submitted') def cancel(self, proc): if self.driver.aborted: raise RuntimeError('driver already aborted') with self._lock: if proc.id in self.procs_pending: del self.procs_pending[proc.id] elif proc.id in self.procs_launched: del self.procs_launched[proc.id] self.driver.killTask(mesos_pb2.TaskID(value=str(proc.id))) for slave_id, procs in self.slave_to_proc.items(): procs.pop(proc.id) if not procs: del self.slave_to_proc[slave_id] def send_data(self, pid, type, data): if self.driver.aborted: raise RuntimeError('driver already aborted') msg = pickle.dumps((pid, type, data)) for slave_id, procs in self.slave_to_proc.iteritems(): if pid in procs: self.driver.sendFrameworkMessage( self.executor.executor_id, mesos_pb2.SlaveID(value=slave_id), msg) return raise RuntimeError('Cannot find slave for pid %s' % (pid,))
class ProcScheduler(Scheduler): def __init__(self): self.framework_id = None self.framework = self._init_framework() self.executor = None self.master = str(CONFIG.get('master', os.environ['MESOS_MASTER'])) self.driver = MesosSchedulerDriver(self, self.framework, self.master) self.procs_pending = {} self.procs_launched = {} self.agent_to_proc = {} self._lock = RLock() def _init_framework(self): framework = dict( user=getpass.getuser(), name=repr(self), hostname=socket.gethostname(), ) return framework def _init_executor(self): executor = dict( executor_id=dict(value='default'), framework_id=self.framework_id, command=dict(value='%s -m %s.executor' % (sys.executable, __package__)), resources=[ dict( name='mem', type='SCALAR', scalar=dict(value=MIN_MEMORY), ), dict(name='cpus', type='SCALAR', scalar=dict(value=MIN_CPUS)), ], ) if 'PYTHONPATH' in os.environ: executor['command.environment'] = dict(variables=[ dict( name='PYTHONPATH', value=os.environ['PYTHONPATH'], ), ]) return executor def _init_task(self, proc, offer): resources = [ dict( name='cpus', type='SCALAR', scalar=dict(value=proc.cpus), ), dict( name='mem', type='SCALAR', scalar=dict(value=proc.mem), ) ] if proc.gpus > 0: resources.append( dict( name='gpus', type='SCALAR', scalar=dict(value=proc.gpus), )) task = dict( task_id=dict(value=str(proc.id)), name=repr(proc), executor=self.executor, agent_id=offer['agent_id'], data=b2a_base64(pickle.dumps(proc.params)).strip(), resources=resources, ) return task def _filters(self, seconds): f = dict(refuse_seconds=seconds) return f def __repr__(self): return "%s[%s]: %s" % (self.__class__.__name__, os.getpid(), ' '.join( sys.argv)) def registered(self, driver, framework_id, master_info): with self._lock: logger.info('Framework registered with id=%s, master=%s' % (framework_id, master_info)) self.framework_id = framework_id self.executor = self._init_executor() def resourceOffers(self, driver, offers): def get_resources(offer): cpus, mem, gpus = 0.0, 0.0, 0 for r in offer['resources']: if r['name'] == 'cpus': cpus = float(r['scalar']['value']) elif r['name'] == 'mem': mem = float(r['scalar']['value']) elif r['name'] == 'gpus': gpus = int(r['scalar']['value']) return cpus, mem, gpus with self._lock: random.shuffle(offers) for offer in offers: if not self.procs_pending: logger.debug('Reject offers forever for no pending procs, ' 'offers=%s' % (offers, )) driver.declineOffer(offer['id'], self._filters(FOREVER)) continue cpus, mem, gpus = get_resources(offer) tasks = [] for proc in list(self.procs_pending.values()): if (cpus >= proc.cpus + MIN_CPUS and mem >= proc.mem + MIN_MEMORY and gpus >= proc.gpus): tasks.append(self._init_task(proc, offer)) del self.procs_pending[proc.id] self.procs_launched[proc.id] = proc cpus -= proc.cpus mem -= proc.mem gpus -= proc.gpus seconds = 5 + random.random() * 5 if tasks: logger.info( 'Accept offer for procs, offer=%s, ' 'procs=%s, filter_time=%s' % (offer, [int(t['task_id']['value']) for t in tasks], seconds)) driver.launchTasks(offer['id'], tasks, self._filters(seconds)) else: logger.info('Retry offer for procs later, offer=%s, ' 'filter_time=%s' % (offer, seconds)) driver.declineOffer(offer['id'], self._filters(seconds)) def _call_finished(self, proc_id, success, message, data, agent_id=None): with self._lock: proc = self.procs_launched.pop(proc_id) if agent_id is not None: if agent_id in self.agent_to_proc: self.agent_to_proc[agent_id].remove(proc_id) else: for agent_id, procs in list(self.agent_to_proc.items()): if proc_id in procs: procs.remove(proc_id) proc._finished(success, message, data) def statusUpdate(self, driver, update): with self._lock: proc_id = int(update['task_id']['value']) logger.info('Status update for proc, id=%s, state=%s' % (proc_id, update['state'])) agent_id = update['agent_id']['value'] if update['state'] == 'TASK_RUNNING': if agent_id in self.agent_to_proc: self.agent_to_proc[agent_id].add(proc_id) else: self.agent_to_proc[agent_id] = set([proc_id]) proc = self.procs_launched[proc_id] proc._started() elif update['state'] not in { 'TASK_STAGING', 'TASK_STARTING', 'TASK_RUNNING' }: success = (update['state'] == 'TASK_FINISHED') message = update.get('message') data = update.get('data') if data: data = pickle.loads(a2b_base64(data)) self._call_finished(proc_id, success, message, data, agent_id) driver.reviveOffers() def offerRescinded(self, driver, offer_id): with self._lock: if self.procs_pending: logger.info('Revive offers for pending procs') driver.reviveOffers() def executorLost(self, driver, executor_id, agent_id, status): agent_id = agent_id['value'] with self._lock: for proc_id in self.agent_to_proc.pop(agent_id, []): self._call_finished(proc_id, False, 'Executor lost', None, agent_id) def slaveLost(self, driver, agent_id): agent_id = agent_id['value'] with self._lock: for proc_id in self.agent_to_proc.pop(agent_id, []): self._call_finished(proc_id, False, 'Agent lost', None, agent_id) def error(self, driver, message): with self._lock: for proc in list(self.procs_pending.values()): self._call_finished(proc.id, False, message, None) for proc in list(self.procs_launched.values()): self._call_finished(proc.id, False, message, None) self.stop() def start(self): self.driver.start() def stop(self): assert not self.driver.aborted self.driver.stop() def submit(self, proc): if self.driver.aborted: raise RuntimeError('driver already aborted') with self._lock: if proc.id not in self.procs_pending: logger.info('Try submit proc, id=%s', (proc.id, )) self.procs_pending[proc.id] = proc if len(self.procs_pending) == 1: logger.info('Revive offers for pending procs') self.driver.reviveOffers() else: raise ValueError('Proc with same id already submitted') def cancel(self, proc): if self.driver.aborted: raise RuntimeError('driver already aborted') with self._lock: if proc.id in self.procs_pending: del self.procs_pending[proc.id] elif proc.id in self.procs_launched: del self.procs_launched[proc.id] self.driver.killTask(dict(value=str(proc.id))) for agent_id, procs in list(self.agent_to_proc.items()): procs.pop(proc.id) if not procs: del self.agent_to_proc[agent_id] def send_data(self, pid, type, data): if self.driver.aborted: raise RuntimeError('driver already aborted') msg = b2a_base64(pickle.dumps((pid, type, data))) for agent_id, procs in list(self.agent_to_proc.items()): if pid in procs: self.driver.sendFrameworkMessage(self.executor['executor_id'], dict(value=agent_id), msg) return raise RuntimeError('Cannot find agent for pid %s' % (pid, ))
class MesosScheduler(DAGScheduler): def __init__(self, master, options): DAGScheduler.__init__(self) self.master = master self.use_self_as_exec = options.self self.cpus = options.cpus self.mem = options.mem self.task_per_node = options.parallel or multiprocessing.cpu_count() self.group = options.group self.logLevel = options.logLevel self.options = options self.started = False self.last_finish_time = 0 self.isRegistered = False self.executor = None self.driver = None self.out_logger = None self.err_logger = None self.lock = threading.RLock() self.init_job() def init_job(self): self.activeJobs = {} self.activeJobsQueue = [] self.taskIdToJobId = {} self.taskIdToAgentId = {} self.jobTasks = {} self.agentTasks = {} def clear(self): DAGScheduler.clear(self) self.init_job() def start(self): if not self.out_logger: self.out_logger = self.start_logger(sys.stdout) if not self.err_logger: self.err_logger = self.start_logger(sys.stderr) def start_driver(self): name = '[dpark] ' + \ os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:]) if len(name) > 256: name = name[:256] + '...' framework = Dict() framework.user = getuser() if framework.user == 'root': raise Exception('dpark is not allowed to run as \'root\'') framework.name = name framework.hostname = socket.gethostname() framework.webui_url = self.options.webui_url self.driver = MesosSchedulerDriver(self, framework, self.master, use_addict=True) self.driver.start() logger.debug('Mesos Scheudler driver started') self.started = True self.last_finish_time = time.time() def check(): while self.started: now = time.time() if (not self.activeJobs and now - self.last_finish_time > MAX_IDLE_TIME): logger.info('stop mesos scheduler after %d seconds idle', now - self.last_finish_time) self.stop() break time.sleep(1) spawn(check) def start_logger(self, output): sock = env.ctx.socket(zmq.PULL) port = sock.bind_to_random_port('tcp://0.0.0.0') def collect_log(): while not self._shutdown: if sock.poll(1000, zmq.POLLIN): line = sock.recv() output.write(line) spawn(collect_log) host = socket.gethostname() addr = 'tcp://%s:%d' % (host, port) logger.debug('log collecter start at %s', addr) return addr @safe def registered(self, driver, frameworkId, masterInfo): self.isRegistered = True logger.debug('connect to master %s:%s, registered as %s', masterInfo.hostname, masterInfo.port, frameworkId.value) self.executor = self.getExecutorInfo(str(frameworkId.value)) @safe def reregistered(self, driver, masterInfo): logger.warning('re-connect to mesos master %s:%s', masterInfo.hostname, masterInfo.port) @safe def disconnected(self, driver): logger.debug('framework is disconnected') def _get_container_image(self): return self.options.image @safe def getExecutorInfo(self, framework_id): info = Dict() info.framework_id.value = framework_id if self.use_self_as_exec: info.command.value = os.path.abspath(sys.argv[0]) info.executor_id.value = sys.argv[0] else: info.command.value = '%s %s' % ( sys.executable, os.path.abspath( os.path.join(os.path.dirname(__file__), 'executor.py'))) info.executor_id.value = 'default' info.command.environment.variables = variables = [] v = Dict() variables.append(v) v.name = 'UID' v.value = str(os.getuid()) v = Dict() variables.append(v) v.name = 'GID' v.value = str(os.getgid()) container_image = self._get_container_image() if container_image: info.container.type = 'DOCKER' info.container.docker.image = container_image info.container.docker.parameters = parameters = [] p = Dict() p.key = 'memory-swap' p.value = '-1' parameters.append(p) info.container.volumes = volumes = [] for path in ['/etc/passwd', '/etc/group']: v = Dict() volumes.append(v) v.host_path = v.container_path = path v.mode = 'RO' for path in conf.MOOSEFS_MOUNT_POINTS: v = Dict() volumes.append(v) v.host_path = v.container_path = path v.mode = 'RW' for path in conf.DPARK_WORK_DIR.split(','): v = Dict() volumes.append(v) v.host_path = v.container_path = path v.mode = 'RW' def _mount_volume(volumes, host_path, container_path, mode): v = Dict() volumes.append(v) v.container_path = container_path v.mode = mode if host_path: v.host_path = host_path if self.options.volumes: for volume in self.options.volumes.split(','): fields = volume.split(':') if len(fields) == 3: host_path, container_path, mode = fields mode = mode.upper() assert mode in ('RO', 'RW') elif len(fields) == 2: host_path, container_path = fields mode = 'RW' elif len(fields) == 1: container_path, = fields host_path = '' mode = 'RW' else: raise Exception('cannot parse volume %s', volume) _mount_volume(volumes, host_path, container_path, mode) info.resources = resources = [] mem = Dict() resources.append(mem) mem.name = 'mem' mem.type = 'SCALAR' mem.scalar.value = EXECUTOR_MEMORY cpus = Dict() resources.append(cpus) cpus.name = 'cpus' cpus.type = 'SCALAR' cpus.scalar.value = EXECUTOR_CPUS Script = os.path.realpath(sys.argv[0]) info.name = Script info.data = encode_data( marshal.dumps((Script, os.getcwd(), sys.path, dict(os.environ), self.task_per_node, self.out_logger, self.err_logger, self.logLevel, env.environ))) return info @safe def submitTasks(self, tasks): if not tasks: return job = SimpleJob(self, tasks, self.cpus, tasks[0].rdd.mem or self.mem) self.activeJobs[job.id] = job self.activeJobsQueue.append(job) self.jobTasks[job.id] = set() stage_scope = '' try: from dpark.web.ui.views.rddopgraph import StageInfo stage_scope = StageInfo.idToRDDNode[ tasks[0].rdd.id].scope.call_site except: pass stage = self.idToStage[tasks[0].stageId] stage.try_times += 1 logger.info( 'Got job %d with %d tasks for stage: %d(try %d times) ' 'at scope[%s] and rdd:%s', job.id, len(tasks), tasks[0].stageId, stage.try_times, stage_scope, tasks[0].rdd) need_revive = self.started if not self.started: self.start_driver() while not self.isRegistered: self.lock.release() time.sleep(0.01) self.lock.acquire() if need_revive: self.requestMoreResources() def requestMoreResources(self): logger.debug('reviveOffers') self.driver.reviveOffers() @safe def resourceOffers(self, driver, offers): rf = Dict() if not self.activeJobs: driver.suppressOffers() rf.refuse_seconds = 60 * 5 for o in offers: driver.declineOffer(o.id, rf) return start = time.time() random.shuffle(offers) cpus = [self.getResource(o.resources, 'cpus') for o in offers] mems = [ self.getResource(o.resources, 'mem') - (o.agent_id.value not in self.agentTasks and EXECUTOR_MEMORY or 0) for o in offers ] logger.debug('get %d offers (%s cpus, %s mem), %d jobs', len(offers), sum(cpus), sum(mems), len(self.activeJobs)) tasks = {} for job in self.activeJobsQueue: while True: launchedTask = False for i, o in enumerate(offers): sid = o.agent_id.value group = (self.getAttribute(o.attributes, 'group') or 'None') if (self.group or group.startswith('_')) and group not in self.group: continue if self.agentTasks.get(sid, 0) >= self.task_per_node: continue if (mems[i] < self.mem + EXECUTOR_MEMORY or cpus[i] < self.cpus + EXECUTOR_CPUS): continue t = job.slaveOffer(str(o.hostname), cpus[i], mems[i]) if not t: continue task = self.createTask(o, job, t) tasks.setdefault(o.id.value, []).append(task) logger.debug('dispatch %s into %s', t, o.hostname) tid = task.task_id.value self.jobTasks[job.id].add(tid) self.taskIdToJobId[tid] = job.id self.taskIdToAgentId[tid] = sid self.agentTasks[sid] = self.agentTasks.get(sid, 0) + 1 cpus[i] -= min(cpus[i], t.cpus) mems[i] -= t.mem launchedTask = True if not launchedTask: break used = time.time() - start if used > 10: logger.error('use too much time in resourceOffers: %.2fs', used) for o in offers: if o.id.value in tasks: driver.launchTasks(o.id, tasks[o.id.value]) else: driver.declineOffer(o.id) logger.debug('reply with %d tasks, %s cpus %s mem left', sum(len(ts) for ts in tasks.values()), sum(cpus), sum(mems)) @safe def offerRescinded(self, driver, offer_id): logger.debug('rescinded offer: %s', offer_id) if self.activeJobs: self.requestMoreResources() def getResource(self, res, name): for r in res: if r.name == name: return r.scalar.value return 0.0 def getAttribute(self, attrs, name): for r in attrs: if r.name == name: return r.text.value def createTask(self, o, job, t): task = Dict() tid = '%s:%s:%s' % (job.id, t.id, t.tried) task.name = 'task %s' % tid task.task_id.value = tid task.agent_id.value = o.agent_id.value task.data = encode_data( compress(six.moves.cPickle.dumps((t, t.tried), -1))) task.executor = self.executor if len(task.data) > 1000 * 1024: logger.warning('task too large: %s %d', t, len(task.data)) resources = task.resources = [] cpu = Dict() resources.append(cpu) cpu.name = 'cpus' cpu.type = 'SCALAR' cpu.scalar.value = t.cpus mem = Dict() resources.append(mem) mem.name = 'mem' mem.type = 'SCALAR' mem.scalar.value = t.mem return task @safe def statusUpdate(self, driver, status): tid = status.task_id.value state = status.state logger.debug('status update: %s %s', tid, state) jid = self.taskIdToJobId.get(tid) _, task_id, tried = list(map(int, tid.split(':'))) if state == 'TASK_RUNNING': if jid in self.activeJobs: job = self.activeJobs[jid] job.statusUpdate(task_id, tried, state) else: logger.debug('kill task %s as its job has gone', tid) self.driver.killTask(Dict(value=tid)) return self.taskIdToJobId.pop(tid, None) if jid in self.jobTasks: self.jobTasks[jid].remove(tid) if tid in self.taskIdToAgentId: agent_id = self.taskIdToAgentId[tid] if agent_id in self.agentTasks: self.agentTasks[agent_id] -= 1 del self.taskIdToAgentId[tid] if jid not in self.activeJobs: logger.debug('ignore task %s as its job has gone', tid) return job = self.activeJobs[jid] reason = status.get('message') data = status.get('data') if state in ('TASK_FINISHED', 'TASK_FAILED') and data: try: reason, result, accUpdate = six.moves.cPickle.loads( decode_data(data)) if result: flag, data = result if flag >= 2: try: data = urllib.request.urlopen(data).read() except IOError: # try again data = urllib.request.urlopen(data).read() flag -= 2 data = decompress(data) if flag == 0: result = marshal.loads(data) else: result = six.moves.cPickle.loads(data) except Exception as e: logger.warning('error when cPickle.loads(): %s, data:%s', e, len(data)) state = 'TASK_FAILED' return job.statusUpdate(task_id, tried, 'TASK_FAILED', 'load failed: %s' % e) else: return job.statusUpdate(task_id, tried, state, reason, result, accUpdate) # killed, lost, load failed job.statusUpdate(task_id, tried, state, reason or data) def jobFinished(self, job): logger.debug('job %s finished', job.id) if job.id in self.activeJobs: self.last_finish_time = time.time() del self.activeJobs[job.id] self.activeJobsQueue.remove(job) for tid in self.jobTasks[job.id]: self.driver.killTask(Dict(value=tid)) del self.jobTasks[job.id] if not self.activeJobs: self.agentTasks.clear() for tid, jid in six.iteritems(self.taskIdToJobId): if jid not in self.activeJobs: logger.debug('kill task %s, because it is orphan', tid) self.driver.killTask(Dict(value=tid)) @safe def check(self): for job in self.activeJobs.values(): if job.check_task_timeout(): self.requestMoreResources() @safe def error(self, driver, message): logger.error('Mesos error message: %s', message) raise RuntimeError(message) # @safe def stop(self): if not self.started: return logger.debug('stop scheduler') self.started = False self.isRegistered = False self.driver.stop(False) self.driver = None def defaultParallelism(self): return 16 def frameworkMessage(self, driver, executor_id, agent_id, data): logger.warning('[agent %s] %s', agent_id.value, data) def executorLost(self, driver, executor_id, agent_id, status): logger.warning('executor at %s %s lost: %s', agent_id.value, executor_id.value, status) self.agentTasks.pop(agent_id.value, None) def slaveLost(self, driver, agent_id): logger.warning('agent %s lost', agent_id.value) self.agentTasks.pop(agent_id.value, None) def killTask(self, job_id, task_id, tried): tid = Dict() tid.value = '%s:%s:%s' % (job_id, task_id, tried) self.driver.killTask(tid)
class MesosScheduler(DAGScheduler): def __init__(self, master, options): DAGScheduler.__init__(self) self.master = master self.use_self_as_exec = options.self self.cpus = options.cpus self.mem = options.mem self.task_per_node = options.parallel or multiprocessing.cpu_count() self.group = options.group self.logLevel = options.logLevel self.options = options self.started = False self.last_finish_time = 0 self.isRegistered = False self.executor = None self.driver = None self.out_logger = None self.err_logger = None self.lock = threading.RLock() self.init_job() def init_job(self): self.activeJobs = {} self.activeJobsQueue = [] self.taskIdToJobId = {} self.taskIdToAgentId = {} self.jobTasks = {} self.agentTasks = {} def clear(self): DAGScheduler.clear(self) self.init_job() def start(self): if not self.out_logger: self.out_logger = self.start_logger(sys.stdout) if not self.err_logger: self.err_logger = self.start_logger(sys.stderr) def start_driver(self): name = '[dpark] ' + \ os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:]) if len(name) > 256: name = name[:256] + '...' framework = Dict() framework.user = getuser() if framework.user == 'root': raise Exception('dpark is not allowed to run as \'root\'') framework.name = name framework.hostname = socket.gethostname() framework.webui_url = self.options.webui_url self.driver = MesosSchedulerDriver( self, framework, self.master, use_addict=True ) self.driver.start() logger.debug('Mesos Scheudler driver started') self.started = True self.last_finish_time = time.time() def check(): while self.started: now = time.time() if (not self.activeJobs and now - self.last_finish_time > MAX_IDLE_TIME): logger.info('stop mesos scheduler after %d seconds idle', now - self.last_finish_time) self.stop() break time.sleep(1) spawn(check) def start_logger(self, output): sock = env.ctx.socket(zmq.PULL) port = sock.bind_to_random_port('tcp://0.0.0.0') def collect_log(): while not self._shutdown: if sock.poll(1000, zmq.POLLIN): line = sock.recv() output.write(line) spawn(collect_log) host = socket.gethostname() addr = 'tcp://%s:%d' % (host, port) logger.debug('log collecter start at %s', addr) return addr @safe def registered(self, driver, frameworkId, masterInfo): self.isRegistered = True logger.debug('connect to master %s:%s, registered as %s', masterInfo.hostname, masterInfo.port, frameworkId.value) self.executor = self.getExecutorInfo(str(frameworkId.value)) @safe def reregistered(self, driver, masterInfo): logger.warning('re-connect to mesos master %s:%s', masterInfo.hostname, masterInfo.port) @safe def disconnected(self, driver): logger.debug('framework is disconnected') @safe def getExecutorInfo(self, framework_id): info = Dict() info.framework_id.value = framework_id if self.use_self_as_exec: info.command.value = os.path.abspath(sys.argv[0]) info.executor_id.value = sys.argv[0] else: info.command.value = '%s %s' % ( sys.executable, os.path.abspath( os.path.join( os.path.dirname(__file__), 'executor.py')) ) info.executor_id.value = 'default' info.command.environment.variables = variables = [] v = Dict() variables.append(v) v.name = 'UID' v.value = str(os.getuid()) v = Dict() variables.append(v) v.name = 'GID' v.value = str(os.getgid()) if self.options.image: info.container.type = 'DOCKER' info.container.docker.image = self.options.image info.container.volumes = volumes = [] for path in ['/etc/passwd', '/etc/group']: v = Dict() volumes.append(v) v.host_path = v.container_path = path v.mode = 'RO' for path in conf.MOOSEFS_MOUNT_POINTS: v = Dict() volumes.append(v) v.host_path = v.container_path = path v.mode = 'RW' for path in conf.DPARK_WORK_DIR.split(','): v = Dict() volumes.append(v) v.host_path = v.container_path = path v.mode = 'RW' if self.options.volumes: for volume in self.options.volumes.split(','): fields = volume.split(':') if len(fields) == 3: host_path, container_path, mode = fields mode = mode.upper() assert mode in ('RO', 'RW') elif len(fields) == 2: host_path, container_path = fields mode = 'RW' elif len(fields) == 1: container_path, = fields host_path = '' mode = 'RW' else: raise Exception('cannot parse volume %s', volume) mkdir_p(host_path) v = Dict() volumes.append(v) v.container_path = container_path v.mode = mode if host_path: v.host_path = host_path info.resources = resources = [] mem = Dict() resources.append(mem) mem.name = 'mem' mem.type = 'SCALAR' mem.scalar.value = EXECUTOR_MEMORY cpus = Dict() resources.append(cpus) cpus.name = 'cpus' cpus.type = 'SCALAR' cpus.scalar.value = EXECUTOR_CPUS Script = os.path.realpath(sys.argv[0]) info.name = Script info.data = encode_data(marshal.dumps( ( Script, os.getcwd(), sys.path, dict(os.environ), self.task_per_node, self.out_logger, self.err_logger, self.logLevel, env.environ ) )) return info @safe def submitTasks(self, tasks): if not tasks: return job = SimpleJob(self, tasks, self.cpus, tasks[0].rdd.mem or self.mem) self.activeJobs[job.id] = job self.activeJobsQueue.append(job) self.jobTasks[job.id] = set() logger.info( 'Got job %d with %d tasks: %s', job.id, len(tasks), tasks[0].rdd) need_revive = self.started if not self.started: self.start_driver() while not self.isRegistered: self.lock.release() time.sleep(0.01) self.lock.acquire() if need_revive: self.requestMoreResources() def requestMoreResources(self): logger.debug('reviveOffers') self.driver.reviveOffers() @safe def resourceOffers(self, driver, offers): rf = Dict() if not self.activeJobs: rf.refuse_seconds = 60 * 5 for o in offers: driver.declineOffer(o.id, rf) return start = time.time() random.shuffle(offers) cpus = [self.getResource(o.resources, 'cpus') for o in offers] mems = [self.getResource(o.resources, 'mem') - (o.agent_id.value not in self.agentTasks and EXECUTOR_MEMORY or 0) for o in offers] logger.debug('get %d offers (%s cpus, %s mem), %d jobs', len(offers), sum(cpus), sum(mems), len(self.activeJobs)) tasks = {} for job in self.activeJobsQueue: while True: launchedTask = False for i, o in enumerate(offers): sid = o.agent_id.value group = ( self.getAttribute( o.attributes, 'group') or 'None') if (self.group or group.startswith( '_')) and group not in self.group: continue if self.agentTasks.get(sid, 0) >= self.task_per_node: continue if (mems[i] < self.mem + EXECUTOR_MEMORY or cpus[i] < self.cpus + EXECUTOR_CPUS): continue t = job.slaveOffer(str(o.hostname), cpus[i], mems[i]) if not t: continue task = self.createTask(o, job, t) tasks.setdefault(o.id.value, []).append(task) logger.debug('dispatch %s into %s', t, o.hostname) tid = task.task_id.value self.jobTasks[job.id].add(tid) self.taskIdToJobId[tid] = job.id self.taskIdToAgentId[tid] = sid self.agentTasks[sid] = self.agentTasks.get(sid, 0) + 1 cpus[i] -= min(cpus[i], t.cpus) mems[i] -= t.mem launchedTask = True if not launchedTask: break used = time.time() - start if used > 10: logger.error('use too much time in resourceOffers: %.2fs', used) for o in offers: if o.id.value in tasks: driver.launchTasks(o.id, tasks[o.id.value]) else: driver.declineOffer(o.id) logger.debug('reply with %d tasks, %s cpus %s mem left', sum(len(ts) for ts in tasks.values()), sum(cpus), sum(mems)) @safe def offerRescinded(self, driver, offer_id): logger.debug('rescinded offer: %s', offer_id) if self.activeJobs: self.requestMoreResources() def getResource(self, res, name): for r in res: if r.name == name: return r.scalar.value return 0.0 def getAttribute(self, attrs, name): for r in attrs: if r.name == name: return r.text.value def createTask(self, o, job, t): task = Dict() tid = '%s:%s:%s' % (job.id, t.id, t.tried) task.name = 'task %s' % tid task.task_id.value = tid task.agent_id.value = o.agent_id.value task.data = encode_data( compress(cPickle.dumps((t, t.tried), -1)) ) task.executor = self.executor if len(task.data) > 1000 * 1024: logger.warning('task too large: %s %d', t, len(task.data)) resources = task.resources = [] cpu = Dict() resources.append(cpu) cpu.name = 'cpus' cpu.type = 'SCALAR' cpu.scalar.value = t.cpus mem = Dict() resources.append(mem) mem.name = 'mem' mem.type = 'SCALAR' mem.scalar.value = t.mem return task @safe def statusUpdate(self, driver, status): tid = status.task_id.value state = status.state logger.debug('status update: %s %s', tid, state) jid = self.taskIdToJobId.get(tid) _, task_id, tried = map(int, tid.split(':')) if state == 'TASK_RUNNING': if jid in self.activeJobs: job = self.activeJobs[jid] job.statusUpdate(task_id, tried, state) else: logger.debug('kill task %s as its job has gone', tid) self.driver.killTask(Dict(value=tid)) return self.taskIdToJobId.pop(tid, None) if jid in self.jobTasks: self.jobTasks[jid].remove(tid) if tid in self.taskIdToAgentId: agent_id = self.taskIdToAgentId[tid] if agent_id in self.agentTasks: self.agentTasks[agent_id] -= 1 del self.taskIdToAgentId[tid] if jid not in self.activeJobs: logger.debug('ignore task %s as its job has gone', tid) return job = self.activeJobs[jid] data = status.get('data') if state in ('TASK_FINISHED', 'TASK_FAILED') and data: try: reason, result, accUpdate = cPickle.loads( decode_data(data)) if result: flag, data = result if flag >= 2: try: data = urllib.urlopen(data).read() except IOError: # try again data = urllib.urlopen(data).read() flag -= 2 data = decompress(data) if flag == 0: result = marshal.loads(data) else: result = cPickle.loads(data) except Exception as e: logger.warning( 'error when cPickle.loads(): %s, data:%s', e, len(data)) state = 'TASK_FAILED' return job.statusUpdate( task_id, tried, 'TASK_FAILED', 'load failed: %s' % e) else: return job.statusUpdate(task_id, tried, state, reason, result, accUpdate) # killed, lost, load failed job.statusUpdate(task_id, tried, state, data) def jobFinished(self, job): logger.debug('job %s finished', job.id) if job.id in self.activeJobs: del self.activeJobs[job.id] self.activeJobsQueue.remove(job) for tid in self.jobTasks[job.id]: self.driver.killTask(Dict(value=tid)) del self.jobTasks[job.id] self.last_finish_time = time.time() if not self.activeJobs: self.agentTasks.clear() for tid, jid in self.taskIdToJobId.iteritems(): if jid not in self.activeJobs: logger.debug('kill task %s, because it is orphan', tid) self.driver.killTask(Dict(value=tid)) @safe def check(self): for job in self.activeJobs.values(): if job.check_task_timeout(): self.requestMoreResources() @safe def error(self, driver, message): logger.warning('Mesos error message: %s', message) # @safe def stop(self): if not self.started: return logger.debug('stop scheduler') self.started = False self.isRegistered = False self.driver.stop(False) self.driver = None def defaultParallelism(self): return 16 def frameworkMessage(self, driver, executor_id, agent_id, data): logger.warning('[agent %s] %s', agent_id.value, data) def executorLost(self, driver, executor_id, agent_id, status): logger.warning( 'executor at %s %s lost: %s', agent_id.value, executor_id.value, status) self.agentTasks.pop(agent_id.value, None) def slaveLost(self, driver, agent_id): logger.warning('agent %s lost', agent_id.value) self.agentTasks.pop(agent_id.value, None) def killTask(self, job_id, task_id, tried): tid = Dict() tid.value = '%s:%s:%s' % (job_id, task_id, tried) self.driver.killTask(tid)