def start_framework(master_uri, exe_path, n_workers, uow_name, prefix, cpu_alloc, mem_alloc): # initialize an executor executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = uuid1().hex executor.command.value = exe_path executor.name = "Exelixi Executor" executor.source = "per-job build" ## NB: TODO download tarball/container from HDFS #uri = executor.command.uris.add() #uri.executable = false #uri.value = "hdfs://namenode/exelixi/exelixi.tgz" # initialize the framework framework = mesos_pb2.FrameworkInfo() framework.user = "" # have Mesos fill in the current user framework.name = "Exelixi Framework" if os.getenv("MESOS_CHECKPOINT"): logging.debug( "Mesos Scheduler: enabling checkpoint for the framework") framework.checkpoint = True # create a scheduler and capture the command line options sched = MesosScheduler(executor, exe_path, n_workers, uow_name, prefix, cpu_alloc, mem_alloc) # initialize a driver if os.getenv("MESOS_AUTHENTICATE"): logging.debug( "Mesos Scheduler: enabling authentication for the framework") if not os.getenv("DEFAULT_PRINCIPAL"): logging.critical( "Mesos Scheduler: expecting authentication principal in the environment" ) sys.exit(1) if not os.getenv("DEFAULT_SECRET"): logging.critical( "Mesos Scheduler: expecting authentication secret in the environment" ) sys.exit(1) credential = mesos_pb2.Credential() credential.principal = os.getenv("DEFAULT_PRINCIPAL") credential.secret = os.getenv("DEFAULT_SECRET") driver = mesos.MesosSchedulerDriver(sched, framework, master_uri, credential) else: driver = mesos.MesosSchedulerDriver(sched, framework, master_uri) return driver
def run(self): self.queue.branchUpdate("/") executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "default" executor.command.value = "PYTHONPATH=%s %s " % ( os.environ['PYTHONPATH'], sys.executable) + os.path.abspath( os.path.join(os.path.dirname(__file__), "./jobTreeExec.py %s" % (self.common_dir))) executor.name = "JobTreeExec" executor.source = "JobTree" framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "JobTree" # TODO(vinod): Make checkpointing the default when it is default # on the slave. if os.getenv("MESOS_CHECKPOINT"): logging.info("Enabling checkpoint for the framework") framework.checkpoint = True if os.getenv("MESOS_AUTHENTICATE"): logging.info("Enabling authentication for the framework") if not os.getenv("DEFAULT_PRINCIPAL"): logging.error( "Expecting authentication principal in the environment") return if not os.getenv("DEFAULT_SECRET"): logging.error( "Expecting authentication secret in the environment") return credential = mesos_pb2.Credential() credential.principal = os.getenv("DEFAULT_PRINCIPAL") credential.secret = os.getenv("DEFAULT_SECRET") driver = mesos.MesosSchedulerDriver( JobTreeScheduler(executor, self.queue), framework, self.mesos_url, credential) else: logging.info("Contacting Mesos: %s" % self.mesos_url) driver = mesos.MesosSchedulerDriver( JobTreeScheduler(executor, self.queue), framework, self.mesos_url) status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 logging.info("Status: %s" % status) # Ensure that the driver process terminates. driver.stop()
def launchTask(self, driver, task): self.tid = task.taskId master, (todo, duration) = pickle.loads(task.arg) scheduler = NestedScheduler(todo, duration, self) print "Running here:" + master self.nested_driver = mesos.MesosSchedulerDriver(scheduler, master) self.nested_driver.start()
def start(self): self.out_logger = self.start_logger(sys.stdout) self.err_logger = self.start_logger(sys.stderr) name = '[dpark@%s] ' % socket.gethostname() name += os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:]) self.driver = mesos.MesosSchedulerDriver(self, name, self.getExecutorInfo(), self.master) self.driver.start() logger.debug("Mesos Scheudler driver started")
def __init__(self, scheduler, workrepo, config): super(MesosDRMS, self).__init__(scheduler, workrepo, config) if self.config.mesos is None: logging.error("Mesos not configured") return self.sched = NebularMesos(scheduler, workrepo, config) self.framework = mesos_pb2.FrameworkInfo() self.framework.user = "" # Have Mesos fill in the current user. self.framework.name = "Nebula" ## additional authentication stuff would go here self.driver = mesos.MesosSchedulerDriver(self.sched, self.framework, self.config.mesos)
def start(self): self.out_logger = self.start_logger(sys.stdout) self.err_logger = self.start_logger(sys.stderr) name = '[dpark@%s] ' % socket.gethostname() name += os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:]) framework = mesos_pb2.FrameworkInfo() framework.user = getpass.getuser() framework.name = name # ignore INFO and DEBUG log os.environ['GLOG_logtostderr'] = '1' os.environ['GLOG_minloglevel'] = '1' import mesos self.driver = mesos.MesosSchedulerDriver(self, framework, self.master) self.driver.start() logger.debug("Mesos Scheudler driver started")
logging.basicConfig(format='[drun] %(asctime)-15s %(message)s', level=options.quiet and logging.ERROR or options.verbose and logging.DEBUG or logging.WARNING) if options.mpi: if options.retry > 0: logging.error("MPI application can not retry") options.retry = 0 sched = MPIScheduler(options, command) else: sched = SubmitScheduler(options, command) logging.debug("Connecting to mesos master %s", options.master) driver = mesos.MesosSchedulerDriver(sched, sched.framework, options.master) driver.start() def handler(signm, frame): logging.warning("got signal %d, exit now", signm) sched.stop(3) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGHUP, handler) signal.signal(signal.SIGABRT, handler) signal.signal(signal.SIGQUIT, handler) try: from rfoo.utils import rconsole rconsole.spawn_server(locals(), 0)
print "Received message:", repr(str(message)) if self.messagesReceived == TOTAL_TASKS: if self.messagesReceived != self.messagesSent: print "Sent", self.messagesSent, print "but received", self.messagesReceived sys.exit(1) print "All tasks done, and all messages received, exiting" driver.stop() if __name__ == "__main__": if len(sys.argv) != 2: print "Usage: %s master" % sys.argv[0] sys.exit(1) executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "default" executor.command.value = os.path.abspath("./test-executor") executor.name = "Test Executor (Python)" executor.source = "python_test" framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "Test Framework (Python)" driver = mesos.MesosSchedulerDriver(TestScheduler(executor), framework, sys.argv[1]) sys.exit(0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1)
# time.sleep(1) # monitorlog.info("RE-starting pbs_server for resources_available setting to take effect") #Popen("pbs_server", shell=True) # monitorlog.debug("qmgr list queue settings: ") # output = Popen("qmgr -c 'l q batch'", shell=True, stdout=PIPE).stdout # for line in output: # monitorlog.debug(line) # monitorlog.info("running killall pbs_sched") # Popen("killall pbs_sched", shell=True) # #time.sleep(2) monitorlog.info("starting pbs_scheduler") #Popen("/etc/init.d/pbs_sched start", shell=True) Popen("pbs_sched", shell=True) #ip = Popen("hostname -i", shell=True, stdout=PIPE).stdout.readline().rstrip() #linux #ip = Popen("ifconfig en1 | awk '/inet / { print $2 }'", shell=True, stdout=PIPE).stdout.readline().rstrip() # os x monitorlog.info("Remembering IP address of scheduler (" + ip + "), and fqdn: " + fqdn) monitorlog.info("Connecting to mesos master %s" % args[0]) sched = MyScheduler(fqdn) threading.Thread(target=monitor, args=[sched]).start() mesos.MesosSchedulerDriver(sched, args[0]).run() monitorlog.info("Finished!")
def run_scheduler(fid, framework_name, master, command): print "Starting secondary scheduler" sched = SecondaryScheduler(framework_name, command) sched_driver = mesos.MesosSchedulerDriver(sched, master, fid) sched_driver.run()
mem = task.resources.add() mem.name = "mem" mem.type = mesos_pb2.Resource.SCALAR mem.scalar.value = TASK_MEM tasks.append(task) driver.launchTasks(offer.id, tasks) def statusUpdate(self, driver, update): print "Task %s is in state %d" % (update.task_id.value, update.state) if update.state == mesos_pb2.TASK_FINISHED: self.tasksFinished += 1 if self.tasksFinished == TOTAL_TASKS: print "All tasks done, exiting" driver.stop(False) if __name__ == "__main__": print "Connecting to %s" % sys.argv[1] frameworkDir = os.path.abspath(os.path.dirname(sys.argv[0])) execPath = os.path.join(frameworkDir, "test_executor") execInfo = mesos_pb2.ExecutorInfo() execInfo.executor_id.value = "default" execInfo.uri = execPath sys.exit( mesos.MesosSchedulerDriver(MyScheduler(), "Python test framework", execInfo, sys.argv[1]).run())
if len(config) != self.tid: (todo, duration) = config[self.tid] arg = pickle.dumps((self.master, (todo, duration))) pars = {"cpus": "%d" % CPUS, "mem": "%d" % MEM} task = mesos.TaskInfo(self.tid, offer.slaveId, "task %d" % self.tid, pars, arg) tasks.append(task) self.running[self.tid] = (todo, duration) self.tid += 1 print "Launching (%d, %d) on agent %s" % (todo, duration, offer.slaveId) driver.launchTasks(oid, tasks) def statusUpdate(self, driver, status): # For now, we are expecting our tasks to be lost ... if status.state == mesos.TASK_LOST: todo, duration = self.running[status.taskId] print "Finished %d todo at %d secs" % (todo, duration) del self.running[status.taskId] if self.tid == len(config) and len(self.running) == 0: driver.stop() if __name__ == "__main__": if sys.argv[1] == "local" or sys.argv[1] == "localquiet": print "Cannot do scaling experiments with 'local' or 'localquiet'!" sys.exit(1) mesos.MesosSchedulerDriver(ScalingScheduler(sys.argv[1]), sys.argv[1]).run()
if len(lines) > MIN_SERVERS and minload == 0: sched.scaleDown(minid) conn.close() except Exception, e: print "exception in monitor()" continue print "done in MONITOR()" if __name__ == "__main__": parser = OptionParser(usage = "Usage: %prog mesos_master") (options,args) = parser.parse_args() if len(args) < 1: print >> sys.stderr, "At least one parameter required." print >> sys.stderr, "Use --help to show usage." exit(2) print "sched = ApacheWebFWScheduler()" sched = ApacheWebFWScheduler() print "Connecting to mesos master %s" % args[0] driver = mesos.MesosSchedulerDriver(sched, sys.argv[1]) threading.Thread(target = monitor, args=[sched]).start() driver.run() print "Scheduler finished!"
call([mpd_cmd, "--daemon"]) mpdtraceproc = Popen(mpdtrace_cmd, shell=True, stdout=PIPE) mpdtraceout = mpdtraceproc.communicate()[0] except OSError,e: print >> sys.stderr, "Error starting mpd or mpdtrace" print >> sys.stderr, e exit(2) (ip,port) = parseIpPort(mpdtraceout) MPD_PID = mpdtraceout.split(" ")[0] print "MPD_PID is %s" % MPD_PID scheduler = MPIScheduler(options, ip, port) framework = mesos_pb2.FrameworkInfo() framework.user = "" if options.name is not None: framework.name = options.name else: framework.name = "MPI: %s" % MPI_PROGRAM[0] driver = mesos.MesosSchedulerDriver( scheduler, framework, args[0]) sys.exit(0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1)
def cli(): schedulers = { "sleep": SleepScheduler, "pg": PGScheduler, "executor": ExecutorScheduler } p = argparse.ArgumentParser(prog="deimos-test.py") p.add_argument("--master", default="localhost:5050", help="Mesos master URL") p.add_argument("--test", choices=schedulers.keys(), default="sleep", help="Test scheduler to use") p.add_argument("--executor", action="store_true", default=False, help="Runs the executor instead of a test scheduler") p.add_argument("--test.container", help="Image URL to use (for any test)") p.add_argument("--test.uris", action="append", help="Pass any number of times to add URIs (for any test)") p.add_argument("--test.trials", type=int, help="Number of tasks to run (for any test)") p.add_argument("--test.sleep", type=int, help="Seconds to sleep (for sleep test)") p.add_argument("--test.command", help="Command to use (for executor test)") parsed = p.parse_args() if parsed.executor: log.info("Mesos executor mode was chosen") driver = mesos.MesosExecutorDriver(ExecutorSchedulerExecutor()) code = driver.run() log.info(mesos_pb2.Status.Name(code)) driver.stop() if code != mesos_pb2.DRIVER_STOPPED: log.error("Driver died in an anomalous state") os._exit(2) os._exit(0) pairs = [(k.split("test.")[1:], v) for k, v in vars(parsed).items()] constructor_args = dict((k[0], v) for k, v in pairs if len(k) == 1 and v) scheduler_class = schedulers[parsed.test] scheduler = scheduler_class(**constructor_args) args = ", ".join("%s=%r" % (k, v) for k, v in constructor_args.items()) log.info("Testing: %s(%s)" % (scheduler_class.__name__, args)) framework = mesos_pb2.FrameworkInfo() framework.name = "deimos-test" framework.user = "" driver = mesos.MesosSchedulerDriver(scheduler, framework, parsed.master) code = driver.run() log.info(mesos_pb2.Status.Name(code)) driver.stop() ################ 2 => driver problem 1 => tests failed 0 => tests passed if code != mesos_pb2.DRIVER_STOPPED: log.error("Driver died in an anomalous state") log.info("Aborted: %s(%s)" % (scheduler_class.__name__, args)) os._exit(2) if any(_ in Scheduler.failed for _ in scheduler.statuses.values()): log.error("Test run failed -- not all tasks made it") log.info("Failure: %s(%s)" % (scheduler_class.__name__, args)) os._exit(1) log.info("Success: %s(%s)" % (scheduler_class.__name__, args)) os._exit(0)
renderExecutor.command.uris.add().value = rendlerArtifact renderExecutor.name = "Renderer" renderExecutor.source = "rendering-crawler" framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "rendering-crawler" if os.getenv("MESOS_CHECKPOINT"): print "Enabling checkpoint for the framework" framework.checkpoint = True crawler = RenderingCrawler(sys.argv[1], crawlExecutor, renderExecutor) driver = mesos.MesosSchedulerDriver( crawler, framework, sys.argv[2]) # driver.run() blocks; we run it in a separate thread def run_driver_async(): status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 driver.stop() sys.exit(status) Thread(target = run_driver_async, args = ()).start() # Listen for CTRL+D while True: line = sys.stdin.readline() if not line: print "Rendler is shutting down"
return mesos.ExecutorInfo(execPath, "") def registered(self, driver, fid): print "Registered!" def resourceOffer(self, driver, oid, offers): tasks = [] print "Got a resource offer!" for offer in offers: if self.tasksLaunched < TOTAL_TASKS: tid = self.tasksLaunched self.tasksLaunched += 1 print "Accepting offer on %s to start task %d" % (offer.host, tid) params = {"cpus": "%d" % TASK_CPUS, "mem": "%d" % TASK_MEM} td = mesos.TaskDescription(tid, offer.slaveId, "task %d" % tid, params, "") tasks.append(td) driver.replyToOffer(oid, tasks, {}) def statusUpdate(self, driver, update): print "Task %d is in state %d" % (update.taskId, update.state) if update.state == mesos.TASK_FINISHED: self.tasksFinished += 1 if self.tasksFinished == TOTAL_TASKS: print "All tasks done, exiting" driver.stop() if __name__ == "__main__": print "Connecting to %s" % sys.argv[1] mesos.MesosSchedulerDriver(MyScheduler(), sys.argv[1]).run()
if __name__ == "__main__": parser = OptionParser( usage="Usage: %prog [options] <master_url> <command>") parser.add_option("-c", "--cpus", help="number of CPUs to request (default: 1)", dest="cpus", type="int", default=DEFAULT_CPUS) parser.add_option("-m", "--mem", help="MB of memory to request (default: 512)", dest="mem", type="int", default=DEFAULT_MEM) parser.add_option("-n", "--name", help="Framework name", dest="name", type="string") (options, args) = parser.parse_args() if len(args) < 2: parser.error("At least two parameters are required.") exit(2) master = args[0] command = " ".join(args[1:]) print "Connecting to mesos master %s" % master sched = SubmitScheduler(options, master, command) mesos.MesosSchedulerDriver(sched, master).run()
logging.basicConfig(format='[drun] %(asctime)-15s %(message)s', level=options.quiet and logging.WARNING or options.verbose and logging.DEBUG or logging.INFO) if options.mpi: if options.retry > 0: logging.error("MPI application can not retry") options.retry = 0 sched = MPIScheduler(options, command) else: sched = SubmitScheduler(options, command) logging.debug("Connecting to mesos master %s", options.master) driver = mesos.MesosSchedulerDriver(sched, sched.framework_name, sched.getExecutorInfo(), options.master) driver.start() def handler(signm, frame): logging.warning("got signal %d, exit now", signm) sched.stop(driver) sys.exit(1) signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGHUP, handler) signal.signal(signal.SIGABRT, handler) signal.signal(signal.SIGQUIT, handler)