Esempio n. 1
0
    def start_framework(master_uri, exe_path, n_workers, uow_name, prefix,
                        cpu_alloc, mem_alloc):
        # initialize an executor
        executor = mesos_pb2.ExecutorInfo()
        executor.executor_id.value = uuid1().hex
        executor.command.value = exe_path
        executor.name = "Exelixi Executor"
        executor.source = "per-job build"

        ## NB: TODO download tarball/container from HDFS
        #uri = executor.command.uris.add()
        #uri.executable = false
        #uri.value = "hdfs://namenode/exelixi/exelixi.tgz"

        # initialize the framework
        framework = mesos_pb2.FrameworkInfo()
        framework.user = ""  # have Mesos fill in the current user
        framework.name = "Exelixi Framework"

        if os.getenv("MESOS_CHECKPOINT"):
            logging.debug(
                "Mesos Scheduler: enabling checkpoint for the framework")
            framework.checkpoint = True

        # create a scheduler and capture the command line options
        sched = MesosScheduler(executor, exe_path, n_workers, uow_name, prefix,
                               cpu_alloc, mem_alloc)

        # initialize a driver
        if os.getenv("MESOS_AUTHENTICATE"):
            logging.debug(
                "Mesos Scheduler: enabling authentication for the framework")

            if not os.getenv("DEFAULT_PRINCIPAL"):
                logging.critical(
                    "Mesos Scheduler: expecting authentication principal in the environment"
                )
                sys.exit(1)

            if not os.getenv("DEFAULT_SECRET"):
                logging.critical(
                    "Mesos Scheduler: expecting authentication secret in the environment"
                )
                sys.exit(1)

            credential = mesos_pb2.Credential()
            credential.principal = os.getenv("DEFAULT_PRINCIPAL")
            credential.secret = os.getenv("DEFAULT_SECRET")

            driver = mesos.MesosSchedulerDriver(sched, framework, master_uri,
                                                credential)
        else:
            driver = mesos.MesosSchedulerDriver(sched, framework, master_uri)

        return driver
Esempio n. 2
0
    def run(self):
        self.queue.branchUpdate("/")
        executor = mesos_pb2.ExecutorInfo()
        executor.executor_id.value = "default"
        executor.command.value = "PYTHONPATH=%s %s " % (
            os.environ['PYTHONPATH'], sys.executable) + os.path.abspath(
                os.path.join(os.path.dirname(__file__), "./jobTreeExec.py %s" %
                             (self.common_dir)))
        executor.name = "JobTreeExec"
        executor.source = "JobTree"

        framework = mesos_pb2.FrameworkInfo()
        framework.user = ""  # Have Mesos fill in the current user.
        framework.name = "JobTree"

        # TODO(vinod): Make checkpointing the default when it is default
        # on the slave.
        if os.getenv("MESOS_CHECKPOINT"):
            logging.info("Enabling checkpoint for the framework")
            framework.checkpoint = True

        if os.getenv("MESOS_AUTHENTICATE"):
            logging.info("Enabling authentication for the framework")

            if not os.getenv("DEFAULT_PRINCIPAL"):
                logging.error(
                    "Expecting authentication principal in the environment")
                return

            if not os.getenv("DEFAULT_SECRET"):
                logging.error(
                    "Expecting authentication secret in the environment")
                return

            credential = mesos_pb2.Credential()
            credential.principal = os.getenv("DEFAULT_PRINCIPAL")
            credential.secret = os.getenv("DEFAULT_SECRET")

            driver = mesos.MesosSchedulerDriver(
                JobTreeScheduler(executor, self.queue), framework,
                self.mesos_url, credential)
        else:
            logging.info("Contacting Mesos: %s" % self.mesos_url)
            driver = mesos.MesosSchedulerDriver(
                JobTreeScheduler(executor, self.queue), framework,
                self.mesos_url)

        status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1
        logging.info("Status: %s" % status)
        # Ensure that the driver process terminates.
        driver.stop()
Esempio n. 3
0
 def launchTask(self, driver, task):
   self.tid = task.taskId
   master, (todo, duration) = pickle.loads(task.arg)
   scheduler = NestedScheduler(todo, duration, self)
   print "Running here:" + master
   self.nested_driver = mesos.MesosSchedulerDriver(scheduler, master)
   self.nested_driver.start()
Esempio n. 4
0
    def start(self):
        self.out_logger = self.start_logger(sys.stdout)
        self.err_logger = self.start_logger(sys.stderr)

        name = '[dpark@%s] ' % socket.gethostname()
        name += os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:])
        self.driver = mesos.MesosSchedulerDriver(self, name,
                                                 self.getExecutorInfo(),
                                                 self.master)
        self.driver.start()
        logger.debug("Mesos Scheudler driver started")
Esempio n. 5
0
    def __init__(self, scheduler, workrepo, config):
        super(MesosDRMS, self).__init__(scheduler, workrepo, config)

        if self.config.mesos is None:
            logging.error("Mesos not configured")
            return
        self.sched = NebularMesos(scheduler, workrepo, config)
        self.framework = mesos_pb2.FrameworkInfo()
        self.framework.user = ""  # Have Mesos fill in the current user.
        self.framework.name = "Nebula"
        ## additional authentication stuff would go here
        self.driver = mesos.MesosSchedulerDriver(self.sched, self.framework,
                                                 self.config.mesos)
Esempio n. 6
0
    def start(self):
        self.out_logger = self.start_logger(sys.stdout)
        self.err_logger = self.start_logger(sys.stderr)

        name = '[dpark@%s] ' % socket.gethostname()
        name += os.path.abspath(sys.argv[0]) + ' ' + ' '.join(sys.argv[1:])
        framework = mesos_pb2.FrameworkInfo()
        framework.user = getpass.getuser()
        framework.name = name

        # ignore INFO and DEBUG log
        os.environ['GLOG_logtostderr'] = '1'
        os.environ['GLOG_minloglevel'] = '1'
        import mesos
        self.driver = mesos.MesosSchedulerDriver(self, framework, self.master)
        self.driver.start()
        logger.debug("Mesos Scheudler driver started")
Esempio n. 7
0
    logging.basicConfig(format='[drun] %(asctime)-15s %(message)s',
                        level=options.quiet and logging.ERROR
                        or options.verbose and logging.DEBUG
                        or logging.WARNING)

    if options.mpi:
        if options.retry > 0:
            logging.error("MPI application can not retry")
            options.retry = 0
        sched = MPIScheduler(options, command)
    else:
        sched = SubmitScheduler(options, command)

    logging.debug("Connecting to mesos master %s", options.master)
    driver = mesos.MesosSchedulerDriver(sched, sched.framework, options.master)

    driver.start()

    def handler(signm, frame):
        logging.warning("got signal %d, exit now", signm)
        sched.stop(3)

    signal.signal(signal.SIGTERM, handler)
    signal.signal(signal.SIGHUP, handler)
    signal.signal(signal.SIGABRT, handler)
    signal.signal(signal.SIGQUIT, handler)

    try:
        from rfoo.utils import rconsole
        rconsole.spawn_server(locals(), 0)
Esempio n. 8
0
        print "Received message:", repr(str(message))

        if self.messagesReceived == TOTAL_TASKS:
            if self.messagesReceived != self.messagesSent:
                print "Sent", self.messagesSent,
                print "but received", self.messagesReceived
                sys.exit(1)
            print "All tasks done, and all messages received, exiting"
            driver.stop()


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print "Usage: %s master" % sys.argv[0]
        sys.exit(1)

    executor = mesos_pb2.ExecutorInfo()
    executor.executor_id.value = "default"
    executor.command.value = os.path.abspath("./test-executor")
    executor.name = "Test Executor (Python)"
    executor.source = "python_test"

    framework = mesos_pb2.FrameworkInfo()
    framework.user = ""  # Have Mesos fill in the current user.
    framework.name = "Test Framework (Python)"

    driver = mesos.MesosSchedulerDriver(TestScheduler(executor), framework,
                                        sys.argv[1])

    sys.exit(0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1)
Esempio n. 9
0
    # time.sleep(1)

    # monitorlog.info("RE-starting pbs_server for resources_available setting to take effect")
    #Popen("pbs_server", shell=True)
    # monitorlog.debug("qmgr list queue settings: ")
    # output = Popen("qmgr -c 'l q batch'", shell=True, stdout=PIPE).stdout
    # for line in output:
    #   monitorlog.debug(line)

    # monitorlog.info("running killall pbs_sched")
    # Popen("killall pbs_sched", shell=True)
    # #time.sleep(2)

    monitorlog.info("starting pbs_scheduler")
    #Popen("/etc/init.d/pbs_sched start", shell=True)
    Popen("pbs_sched", shell=True)

    #ip = Popen("hostname -i", shell=True, stdout=PIPE).stdout.readline().rstrip() #linux
    #ip = Popen("ifconfig en1 | awk '/inet / { print $2 }'", shell=True, stdout=PIPE).stdout.readline().rstrip() # os x
    monitorlog.info("Remembering IP address of scheduler (" + ip +
                    "), and fqdn: " + fqdn)

    monitorlog.info("Connecting to mesos master %s" % args[0])

    sched = MyScheduler(fqdn)
    threading.Thread(target=monitor, args=[sched]).start()

    mesos.MesosSchedulerDriver(sched, args[0]).run()

    monitorlog.info("Finished!")
Esempio n. 10
0
def run_scheduler(fid, framework_name, master, command):
    print "Starting secondary scheduler"
    sched = SecondaryScheduler(framework_name, command)
    sched_driver = mesos.MesosSchedulerDriver(sched, master, fid)
    sched_driver.run()
Esempio n. 11
0
                mem = task.resources.add()
                mem.name = "mem"
                mem.type = mesos_pb2.Resource.SCALAR
                mem.scalar.value = TASK_MEM

                tasks.append(task)
                driver.launchTasks(offer.id, tasks)

    def statusUpdate(self, driver, update):
        print "Task %s is in state %d" % (update.task_id.value, update.state)
        if update.state == mesos_pb2.TASK_FINISHED:
            self.tasksFinished += 1
            if self.tasksFinished == TOTAL_TASKS:
                print "All tasks done, exiting"
                driver.stop(False)


if __name__ == "__main__":
    print "Connecting to %s" % sys.argv[1]

    frameworkDir = os.path.abspath(os.path.dirname(sys.argv[0]))
    execPath = os.path.join(frameworkDir, "test_executor")
    execInfo = mesos_pb2.ExecutorInfo()
    execInfo.executor_id.value = "default"
    execInfo.uri = execPath

    sys.exit(
        mesos.MesosSchedulerDriver(MyScheduler(), "Python test framework",
                                   execInfo, sys.argv[1]).run())
Esempio n. 12
0
            if len(config) != self.tid:
                (todo, duration) = config[self.tid]
                arg = pickle.dumps((self.master, (todo, duration)))
                pars = {"cpus": "%d" % CPUS, "mem": "%d" % MEM}
                task = mesos.TaskInfo(self.tid, offer.slaveId,
                                      "task %d" % self.tid, pars, arg)
                tasks.append(task)
                self.running[self.tid] = (todo, duration)
                self.tid += 1
                print "Launching (%d, %d) on agent %s" % (todo, duration,
                                                          offer.slaveId)
        driver.launchTasks(oid, tasks)

    def statusUpdate(self, driver, status):
        # For now, we are expecting our tasks to be lost ...
        if status.state == mesos.TASK_LOST:
            todo, duration = self.running[status.taskId]
            print "Finished %d todo at %d secs" % (todo, duration)
            del self.running[status.taskId]
            if self.tid == len(config) and len(self.running) == 0:
                driver.stop()


if __name__ == "__main__":
    if sys.argv[1] == "local" or sys.argv[1] == "localquiet":
        print "Cannot do scaling experiments with 'local' or 'localquiet'!"
        sys.exit(1)

    mesos.MesosSchedulerDriver(ScalingScheduler(sys.argv[1]),
                               sys.argv[1]).run()
Esempio n. 13
0
          if len(lines) > MIN_SERVERS and minload == 0:
            sched.scaleDown(minid)

        conn.close()
    except Exception, e:
      print "exception in monitor()"
      continue
  print "done in MONITOR()"

if __name__ == "__main__":
  parser = OptionParser(usage = "Usage: %prog mesos_master")

  (options,args) = parser.parse_args()
  if len(args) < 1:
    print >> sys.stderr, "At least one parameter required."
    print >> sys.stderr, "Use --help to show usage."
    exit(2)

  print "sched = ApacheWebFWScheduler()"
  sched = ApacheWebFWScheduler()

  print "Connecting to mesos master %s" % args[0]
  driver = mesos.MesosSchedulerDriver(sched, sys.argv[1])

  threading.Thread(target = monitor, args=[sched]).start()

  driver.run()

  print "Scheduler finished!"
Esempio n. 14
0
      call([mpd_cmd, "--daemon"])

    mpdtraceproc = Popen(mpdtrace_cmd, shell=True, stdout=PIPE)
    mpdtraceout = mpdtraceproc.communicate()[0]

  except OSError,e:
    print >> sys.stderr, "Error starting mpd or mpdtrace"
    print >> sys.stderr, e
    exit(2)

  (ip,port) = parseIpPort(mpdtraceout)

  MPD_PID = mpdtraceout.split(" ")[0]
  print "MPD_PID is %s" % MPD_PID

  scheduler = MPIScheduler(options, ip, port)

  framework = mesos_pb2.FrameworkInfo()
  framework.user = ""

  if options.name is not None:
    framework.name = options.name
  else:
    framework.name = "MPI: %s" % MPI_PROGRAM[0]

  driver = mesos.MesosSchedulerDriver(
    scheduler,
    framework,
    args[0])
  sys.exit(0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1)
Esempio n. 15
0
def cli():
    schedulers = {
        "sleep": SleepScheduler,
        "pg": PGScheduler,
        "executor": ExecutorScheduler
    }
    p = argparse.ArgumentParser(prog="deimos-test.py")
    p.add_argument("--master",
                   default="localhost:5050",
                   help="Mesos master URL")
    p.add_argument("--test",
                   choices=schedulers.keys(),
                   default="sleep",
                   help="Test scheduler to use")
    p.add_argument("--executor",
                   action="store_true",
                   default=False,
                   help="Runs the executor instead of a test scheduler")
    p.add_argument("--test.container", help="Image URL to use (for any test)")
    p.add_argument("--test.uris",
                   action="append",
                   help="Pass any number of times to add URIs (for any test)")
    p.add_argument("--test.trials",
                   type=int,
                   help="Number of tasks to run (for any test)")
    p.add_argument("--test.sleep",
                   type=int,
                   help="Seconds to sleep (for sleep test)")
    p.add_argument("--test.command", help="Command to use (for executor test)")
    parsed = p.parse_args()

    if parsed.executor:
        log.info("Mesos executor mode was chosen")
        driver = mesos.MesosExecutorDriver(ExecutorSchedulerExecutor())
        code = driver.run()
        log.info(mesos_pb2.Status.Name(code))
        driver.stop()
        if code != mesos_pb2.DRIVER_STOPPED:
            log.error("Driver died in an anomalous state")
            os._exit(2)
        os._exit(0)

    pairs = [(k.split("test.")[1:], v) for k, v in vars(parsed).items()]
    constructor_args = dict((k[0], v) for k, v in pairs if len(k) == 1 and v)
    scheduler_class = schedulers[parsed.test]
    scheduler = scheduler_class(**constructor_args)
    args = ", ".join("%s=%r" % (k, v) for k, v in constructor_args.items())
    log.info("Testing: %s(%s)" % (scheduler_class.__name__, args))

    framework = mesos_pb2.FrameworkInfo()
    framework.name = "deimos-test"
    framework.user = ""
    driver = mesos.MesosSchedulerDriver(scheduler, framework, parsed.master)
    code = driver.run()
    log.info(mesos_pb2.Status.Name(code))
    driver.stop()
    ################  2 => driver problem  1 => tests failed  0 => tests passed
    if code != mesos_pb2.DRIVER_STOPPED:
        log.error("Driver died in an anomalous state")
        log.info("Aborted: %s(%s)" % (scheduler_class.__name__, args))
        os._exit(2)
    if any(_ in Scheduler.failed for _ in scheduler.statuses.values()):
        log.error("Test run failed -- not all tasks made it")
        log.info("Failure: %s(%s)" % (scheduler_class.__name__, args))
        os._exit(1)
    log.info("Success: %s(%s)" % (scheduler_class.__name__, args))
    os._exit(0)
Esempio n. 16
0
    renderExecutor.command.uris.add().value = rendlerArtifact
    renderExecutor.name = "Renderer"
    renderExecutor.source = "rendering-crawler"

    framework = mesos_pb2.FrameworkInfo()
    framework.user = "" # Have Mesos fill in the current user.
    framework.name = "rendering-crawler"

    if os.getenv("MESOS_CHECKPOINT"):
        print "Enabling checkpoint for the framework"
        framework.checkpoint = True

    crawler = RenderingCrawler(sys.argv[1], crawlExecutor, renderExecutor)

    driver = mesos.MesosSchedulerDriver(
        crawler,
        framework,
        sys.argv[2])

    # driver.run() blocks; we run it in a separate thread
    def run_driver_async():
        status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1
        driver.stop()
        sys.exit(status)

    Thread(target = run_driver_async, args = ()).start()

    # Listen for CTRL+D
    while True:
        line = sys.stdin.readline()
        if not line:
            print "Rendler is shutting down"
Esempio n. 17
0
    return mesos.ExecutorInfo(execPath, "")

  def registered(self, driver, fid):
    print "Registered!"

  def resourceOffer(self, driver, oid, offers):
    tasks = []
    print "Got a resource offer!"
    for offer in offers:
      if self.tasksLaunched < TOTAL_TASKS:
        tid = self.tasksLaunched
        self.tasksLaunched += 1
        print "Accepting offer on %s to start task %d" % (offer.host, tid)
        params = {"cpus": "%d" % TASK_CPUS, "mem": "%d" % TASK_MEM}
        td = mesos.TaskDescription(tid, offer.slaveId, "task %d" % tid,
            params, "")
        tasks.append(td)
    driver.replyToOffer(oid, tasks, {})

  def statusUpdate(self, driver, update):
    print "Task %d is in state %d" % (update.taskId, update.state)
    if update.state == mesos.TASK_FINISHED:
      self.tasksFinished += 1
      if self.tasksFinished == TOTAL_TASKS:
        print "All tasks done, exiting"
        driver.stop()

if __name__ == "__main__":
  print "Connecting to %s" % sys.argv[1]
  mesos.MesosSchedulerDriver(MyScheduler(), sys.argv[1]).run()
Esempio n. 18
0
if __name__ == "__main__":
    parser = OptionParser(
        usage="Usage: %prog [options] <master_url> <command>")
    parser.add_option("-c",
                      "--cpus",
                      help="number of CPUs to request (default: 1)",
                      dest="cpus",
                      type="int",
                      default=DEFAULT_CPUS)
    parser.add_option("-m",
                      "--mem",
                      help="MB of memory to request (default: 512)",
                      dest="mem",
                      type="int",
                      default=DEFAULT_MEM)
    parser.add_option("-n",
                      "--name",
                      help="Framework name",
                      dest="name",
                      type="string")
    (options, args) = parser.parse_args()
    if len(args) < 2:
        parser.error("At least two parameters are required.")
        exit(2)
    master = args[0]
    command = " ".join(args[1:])
    print "Connecting to mesos master %s" % master
    sched = SubmitScheduler(options, master, command)
    mesos.MesosSchedulerDriver(sched, master).run()
Esempio n. 19
0
    logging.basicConfig(format='[drun] %(asctime)-15s %(message)s',
                        level=options.quiet and logging.WARNING
                        or options.verbose and logging.DEBUG or logging.INFO)

    if options.mpi:
        if options.retry > 0:
            logging.error("MPI application can not retry")
            options.retry = 0
        sched = MPIScheduler(options, command)
    else:
        sched = SubmitScheduler(options, command)

    logging.debug("Connecting to mesos master %s", options.master)
    driver = mesos.MesosSchedulerDriver(sched, sched.framework_name,
                                        sched.getExecutorInfo(),
                                        options.master)

    driver.start()

    def handler(signm, frame):
        logging.warning("got signal %d, exit now", signm)
        sched.stop(driver)
        sys.exit(1)

    signal.signal(signal.SIGTERM, handler)
    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGHUP, handler)
    signal.signal(signal.SIGABRT, handler)
    signal.signal(signal.SIGQUIT, handler)