Esempio n. 1
0
def main(_):
    """Main"""
    zkconn = kazoo_client.TwitterKazooClient(FLAGS.zk)
    zkconn.start()

    soa_data = SOAData(ttl=FLAGS.soa_ttl,
                       ns1=FLAGS.soa_nameserver or socket.getfqdn(),
                       email=FLAGS.soa_email or 'root.%s' % FLAGS.domain,
                       refresh=FLAGS.soa_refresh,
                       retry=FLAGS.soa_retry,
                       expire=FLAGS.soa_expire,
                       nxdomain_ttl=FLAGS.soa_nxdomain_ttl)

    server = ZknsServer(zk_handle=zkconn,
                        domain=FLAGS.domain,
                        ttl=FLAGS.ttl,
                        soa_data=soa_data)

    thread = ExceptionalThread(
        target=lambda: server.run(FLAGS.listen,
                                  FLAGS.port,
                                  server='cherrypy'))
    thread.daemon = True
    thread.start()

    try:
        wait_forever()
    except KeyboardInterrupt:
        log.fatal('KeyboardInterrupt! Shutting down.')
Esempio n. 2
0
 def __init__(self,
              task_id,
              task_monitor,
              disk_collector=DiskCollector,
              process_collection_interval=PROCESS_COLLECTION_INTERVAL,
              disk_collection_interval=DISK_COLLECTION_INTERVAL,
              history_time=HISTORY_TIME,
              history_provider=HistoryProvider()):
     """
   task_monitor: TaskMonitor object specifying the task whose resources should be monitored
   sandbox: Directory for which to monitor disk utilisation
 """
     self._task_monitor = task_monitor  # exposes PIDs, sandbox
     self._task_id = task_id
     log.debug('Initialising resource collection for task %s' %
               self._task_id)
     self._process_collectors = dict(
     )  # ProcessStatus => ProcessTreeCollector
     self._disk_collector_class = disk_collector
     self._disk_collector = None
     self._process_collection_interval = process_collection_interval.as_(
         Time.SECONDS)
     self._disk_collection_interval = disk_collection_interval.as_(
         Time.SECONDS)
     min_collection_interval = min(self._process_collection_interval,
                                   self._disk_collection_interval)
     self._history = history_provider.provides(history_time,
                                               min_collection_interval)
     self._kill_signal = threading.Event()
     ExceptionalThread.__init__(self,
                                name='%s[%s]' %
                                (self.__class__.__name__, task_id))
     self.daemon = True
Esempio n. 3
0
  def __init__(
      self,
      task_id,
      task_monitor,
      disk_collector_provider=DiskCollectorProvider(),
      process_collection_interval=PROCESS_COLLECTION_INTERVAL,
      disk_collection_interval=DiskCollectorSettings.DISK_COLLECTION_INTERVAL,
      history_time=HISTORY_TIME,
      history_provider=HistoryProvider()):

    """
      task_monitor: TaskMonitor object specifying the task whose resources should be monitored
      sandbox: Directory for which to monitor disk utilisation
    """
    self._task_monitor = task_monitor  # exposes PIDs, sandbox
    self._task_id = task_id
    log.debug('Initialising resource collection for task %s', self._task_id)
    self._process_collectors = dict()  # ProcessStatus => ProcessTreeCollector

    self._disk_collector_provider = disk_collector_provider
    self._disk_collector = None
    self._process_collection_interval = process_collection_interval.as_(Time.SECONDS)
    self._disk_collection_interval = disk_collection_interval.as_(Time.SECONDS)
    min_collection_interval = min(self._process_collection_interval, self._disk_collection_interval)
    self._history = history_provider.provides(history_time, min_collection_interval)
    self._kill_signal = threading.Event()
    ExceptionalThread.__init__(self, name='%s[%s]' % (self.__class__.__name__, task_id))
    self.daemon = True
Esempio n. 4
0
 def __init__(self,
              task_id,
              task_monitor,
              process_collector=ProcessTreeCollector,
              disk_collector=DiskCollector,
              process_collection_interval=Amount(20, Time.SECONDS),
              disk_collection_interval=Amount(1, Time.MINUTES),
              history_time=Amount(1, Time.HOURS)):
   """
     task_monitor: TaskMonitor object specifying the task whose resources should be monitored
     sandbox: Directory for which to monitor disk utilisation
   """
   self._task_monitor = task_monitor  # exposes PIDs, sandbox
   self._task_id = task_id
   log.debug('Initialising resource collection for task %s' % self._task_id)
   self._process_collectors = dict()  # ProcessStatus => ProcessTreeCollector
   self._process_collector_factory = process_collector
   self._disk_collector_class = disk_collector
   self._disk_collector = None
   self._process_collection_interval = process_collection_interval.as_(Time.SECONDS)
   self._disk_collection_interval = disk_collection_interval.as_(Time.SECONDS)
   min_collection_interval = min(self._process_collection_interval, self._disk_collection_interval)
   history_length = int(history_time.as_(Time.SECONDS) / min_collection_interval)
   if history_length > self.MAX_HISTORY:
     raise ValueError("Requested history length too large")
   log.debug("Initialising ResourceHistory of length %s" % history_length)
   self._history = ResourceHistory(history_length)
   self._kill_signal = threading.Event()
   ExceptionalThread.__init__(self)
   self.daemon = True
 def __init__(self,
              checkpoint_root,
              verbose=True,
              task_killer=TaskKiller,
              executor_detector=ExecutorDetector,
              task_garbage_collector=TaskGarbageCollector,
              clock=time):
   ExecutorBase.__init__(self)
   ExceptionalThread.__init__(self)
   self.daemon = True
   self._stop_event = threading.Event()
   # mapping of task_id => (TaskInfo, AdjustRetainedTasks), in the order in
   # which they were received via a launchTask.
   self._gc_task_queue = OrderedDict()
   # cache the ExecutorDriver provided by the slave, so we can use it out
   # of band from slave-initiated callbacks.  This should be supplied by
   # ExecutorBase.registered() when the executor first registers with the
   # slave.
   self._driver = None
   self._slave_id = None  # cache the slave ID provided by the slave
   self._task_id = None  # the task_id currently being executed by the ThermosGCExecutor, if any
   self._start_time = None  # the start time of a task currently being executed, if any
   self._detector = executor_detector()
   self._collector = task_garbage_collector(root=checkpoint_root)
   self._clock = clock
   self._task_killer = task_killer
   self._checkpoint_root = checkpoint_root
   self._dropped_tasks = AtomicGauge('dropped_tasks')
   self.metrics.register(self._dropped_tasks)
Esempio n. 6
0
 def __init__(self,
              task_id,
              task_monitor,
              process_collector=ProcessTreeCollector,
              disk_collector=DiskCollector,
              process_collection_interval=Amount(20, Time.SECONDS),
              disk_collection_interval=Amount(1, Time.MINUTES),
              history_time=Amount(1, Time.HOURS)):
   """
     task_monitor: TaskMonitor object specifying the task whose resources should be monitored
     sandbox: Directory for which to monitor disk utilisation
   """
   self._task_monitor = task_monitor  # exposes PIDs, sandbox
   self._task_id = task_id
   log.debug('Initialising resource collection for task %s' % self._task_id)
   self._process_collectors = dict()  # ProcessStatus => ProcessTreeCollector
   self._process_collector_factory = process_collector
   self._disk_collector_class = disk_collector
   self._disk_collector = None
   self._process_collection_interval = process_collection_interval.as_(Time.SECONDS)
   self._disk_collection_interval = disk_collection_interval.as_(Time.SECONDS)
   min_collection_interval = min(self._process_collection_interval, self._disk_collection_interval)
   history_length = int(history_time.as_(Time.SECONDS) / min_collection_interval)
   if history_length > self.MAX_HISTORY:
     raise ValueError("Requested history length too large")
   log.debug("Initialising ResourceHistory of length %s" % history_length)
   self._history = ResourceHistory(history_length)
   self._kill_signal = threading.Event()
   ExceptionalThread.__init__(self, name='%s[%s]' % (self.__class__.__name__, task_id))
   self.daemon = True
Esempio n. 7
0
 def __init__(self,
              checkpoint_root,
              verbose=True,
              task_killer=TaskKiller,
              executor_detector=ExecutorDetector,
              task_garbage_collector=TaskGarbageCollector,
              clock=time):
     ExecutorBase.__init__(self)
     ExceptionalThread.__init__(self)
     self.daemon = True
     self._stop_event = threading.Event()
     # mapping of task_id => (TaskInfo, AdjustRetainedTasks), in the order in
     # which they were received via a launchTask.
     self._gc_task_queue = OrderedDict()
     # cache the ExecutorDriver provided by the slave, so we can use it out
     # of band from slave-initiated callbacks.  This should be supplied by
     # ExecutorBase.registered() when the executor first registers with the
     # slave.
     self._driver = None
     self._slave_id = None  # cache the slave ID provided by the slave
     self._task_id = None  # the task_id currently being executed by the ThermosGCExecutor, if any
     self._start_time = None  # the start time of a task currently being executed, if any
     self._detector = executor_detector()
     self._collector = task_garbage_collector(root=checkpoint_root)
     self._clock = clock
     self._task_killer = task_killer
     self._checkpoint_root = checkpoint_root
     self._dropped_tasks = AtomicGauge('dropped_tasks')
     self.metrics.register(self._dropped_tasks)
Esempio n. 8
0
 def __init__(self, clock=time):
   self._clock = clock
   self._self = psutil.Process(os.getpid())
   self._orphan = False
   self.metrics.register(LambdaGauge('orphan', lambda: int(self._orphan)))
   self._metrics = dict((metric, MutatorGauge(metric, 0)) for metric in self.MUTATOR_METRICS)
   for metric in self._metrics.values():
     self.metrics.register(metric)
   ExceptionalThread.__init__(self)
   self.daemon = True
Esempio n. 9
0
def main(_, options):
  observer = initialize(options)
  observer.start()
  root_server = configure_server(observer)

  thread = ExceptionalThread(target=lambda: root_server.run(options.ip, options.port, 'cherrypy'))
  thread.daemon = True
  thread.start()

  sleep_forever()
Esempio n. 10
0
def main(_, options):
  observer = initialize(options)
  observer.start()
  root_server = configure_server(observer)

  thread = ExceptionalThread(target=lambda: root_server.run('0.0.0.0', options.port, 'cherrypy'))
  thread.daemon = True
  thread.start()

  sleep_forever()
Esempio n. 11
0
  def main(_, opts):
    path_detector = FixedPathDetector(opts.root)
    task_observer = TaskObserver(path_detector)
    task_observer.start()
    server = configure_server(task_observer)

    thread = ExceptionalThread(target=lambda: server.run('0.0.0.0', opts.port, 'cherrypy'))
    thread.daemon = True
    thread.start()

    sleep_forever()
Esempio n. 12
0
 def __init__(self, root, resource_monitor_class=TaskResourceMonitor):
   self._pathspec = TaskPath(root=root)
   self._detector = TaskDetector(root)
   if not issubclass(resource_monitor_class, ResourceMonitorBase):
     raise ValueError("resource monitor class must implement ResourceMonitorBase!")
   self._resource_monitor = resource_monitor_class
   self._active_tasks = {}    # task_id => ActiveObservedTask
   self._finished_tasks = {}  # task_id => FinishedObservedTask
   self._stop_event = threading.Event()
   ExceptionalThread.__init__(self)
   Lockable.__init__(self)
   self.daemon = True
Esempio n. 13
0
    def main(_, opts):
        path_detector = FixedPathDetector(opts.root)
        task_observer = TaskObserver(path_detector)
        task_observer.start()
        server = configure_server(task_observer)

        thread = ExceptionalThread(
            target=lambda: server.run('0.0.0.0', opts.port, 'cherrypy'))
        thread.daemon = True
        thread.start()

        sleep_forever()
Esempio n. 14
0
 def __init__(self, root, resource_monitor_class=TaskResourceMonitor):
   self._pathspec = TaskPath(root=root)
   self._detector = TaskDetector(root)
   if not issubclass(resource_monitor_class, ResourceMonitorBase):
     raise ValueError("resource monitor class must implement ResourceMonitorBase!")
   self._resource_monitor = resource_monitor_class
   self._active_tasks = {}    # task_id => ActiveObservedTask
   self._finished_tasks = {}  # task_id => FinishedObservedTask
   self._stop_event = threading.Event()
   ExceptionalThread.__init__(self)
   Lockable.__init__(self)
   self.daemon = True
Esempio n. 15
0
    def main(args, opts):
        """Main"""
        server = RedirServer(opts.zk_basepath,
                             opts.subdomain,
                             opts.base_domain)
        thread = ExceptionalThread(
            target=lambda: server.run(opts.listen,
                                      opts.port,
                                      server='cherrypy'))
        thread.daemon = True
        thread.start()

        wait_forever()
Esempio n. 16
0
def main(_, options):
  path_detector = ChainedPathDetector(
      FixedPathDetector(options.root),
      MesosPathDetector(options.mesos_root),
  )
  observer = TaskObserver(path_detector)
  observer.start()
  root_server = configure_server(observer)

  thread = ExceptionalThread(target=lambda: root_server.run('0.0.0.0', options.port, 'cherrypy'))
  thread.daemon = True
  thread.start()

  sleep_forever()
Esempio n. 17
0
def main(_, options):
    path_detector = ChainedPathDetector(
        FixedPathDetector(options.root),
        MesosPathDetector(options.mesos_root),
    )
    observer = TaskObserver(path_detector)
    observer.start()
    root_server = configure_server(observer)

    thread = ExceptionalThread(
        target=lambda: root_server.run('0.0.0.0', options.port, 'cherrypy'))
    thread.daemon = True
    thread.start()

    sleep_forever()
Esempio n. 18
0
def test_quitquitquit():
    def main():
        app.wait_forever()

    def wait_and_quit():
        time.sleep(0.5)
        app.quitquitquit()

    stop_thread = ExceptionalThread(target=wait_and_quit)
    stop_thread.start()

    app = TestApplication(main)
    app.main()

    assert app.exited_rc == 0
Esempio n. 19
0
 def __init__(self, clock=time):
     self._clock = clock
     self._self = psutil.Process(os.getpid())
     if hasattr(self._self, "getcwd"):
         self._version = self.get_release_from_binary(os.path.join(self._self.getcwd(), self._self.cmdline[1]))
     else:
         self._version = "UNKNOWN"
     self.metrics.register(NamedGauge("version", self._version))
     self._orphan = False
     self.metrics.register(LambdaGauge("orphan", lambda: int(self._orphan)))
     self._metrics = dict((metric, MutatorGauge(metric, 0)) for metric in self.MUTATOR_METRICS)
     for metric in self._metrics.values():
         self.metrics.register(metric)
     ExceptionalThread.__init__(self)
     self.daemon = True
Esempio n. 20
0
def test_quitquitquit():
  def main():
    app.wait_forever()

  def wait_and_quit():
    time.sleep(0.5)
    app.quitquitquit()

  stop_thread = ExceptionalThread(target=wait_and_quit)
  stop_thread.start()

  app = TestApplication(main)
  app.main()

  assert app.exited_rc == 0
Esempio n. 21
0
 def __init__(self, clock=time):
   self._clock = clock
   self._self = psutil.Process(os.getpid())
   try:
     self._version = self.get_release_from_binary(
       os.path.join(self._self.cwd(), self._self.cmdline()[1]))
   except (IndexError, psutil.Error):
     self._version = 'UNKNOWN'
   self.metrics.register(NamedGauge('version', self._version))
   self._orphan = False
   self.metrics.register(LambdaGauge('orphan', lambda: int(self._orphan)))
   self._metrics = dict((metric, MutatorGauge(metric, 0)) for metric in self.MUTATOR_METRICS)
   for metric in self._metrics.values():
     self.metrics.register(metric)
   ExceptionalThread.__init__(self)
   self.daemon = True
Esempio n. 22
0
 def __init__(self, clock=time):
   self._clock = clock
   self._self = psutil.Process(os.getpid())
   try:
     self._version = self.get_release_from_binary(
       os.path.join(self._self.cwd(), self._self.cmdline()[1]))
   except (IndexError, psutil.Error):
     self._version = 'UNKNOWN'
   self.metrics.register(NamedGauge('version', self._version))
   self._orphan = False
   self.metrics.register(LambdaGauge('orphan', lambda: int(self._orphan)))
   self._metrics = dict((metric, MutatorGauge(metric, 0)) for metric in self.MUTATOR_METRICS)
   for metric in self._metrics.values():
     self.metrics.register(metric)
   ExceptionalThread.__init__(self)
   self.daemon = True
Esempio n. 23
0
 def __init__(self,
              path_detector,
              interval=POLLING_INTERVAL,
              task_process_collection_interval=TaskResourceMonitor.PROCESS_COLLECTION_INTERVAL,
              task_disk_collection_interval=TaskResourceMonitor.DISK_COLLECTION_INTERVAL):
   self._detector = ObserverTaskDetector(
       path_detector,
       self.__on_active,
       self.__on_finished,
       self.__on_removed)
   self._interval = interval
   self._task_process_collection_interval = task_process_collection_interval
   self._task_disk_collection_interval = task_disk_collection_interval
   self._active_tasks = {}    # task_id => ActiveObservedTask
   self._finished_tasks = {}  # task_id => FinishedObservedTask
   self._stop_event = threading.Event()
   ExceptionalThread.__init__(self)
   Lockable.__init__(self)
   self.daemon = True
Esempio n. 24
0
 def __init__(self,
              path_detector,
              resource_monitor_class=TaskResourceMonitor,
              interval=POLLING_INTERVAL):
   self._detector = ObserverTaskDetector(
       path_detector,
       self.__on_active,
       self.__on_finished,
       self.__on_removed)
   if not issubclass(resource_monitor_class, ResourceMonitorBase):
     raise ValueError("resource monitor class must implement ResourceMonitorBase!")
   self._resource_monitor_class = resource_monitor_class
   self._interval = interval
   self._active_tasks = {}    # task_id => ActiveObservedTask
   self._finished_tasks = {}  # task_id => FinishedObservedTask
   self._stop_event = threading.Event()
   ExceptionalThread.__init__(self)
   Lockable.__init__(self)
   self.daemon = True
Esempio n. 25
0
    def main(_, opts):
        """Main"""

        if not opts.bucket:
            log.error('--bucket is required.')
            app.help()

        server = S3Web(bucket=opts.bucket,
                       prefix=opts.prefix,
                       access_key_id=opts.access_key_id,
                       secret_key=opts.secret_key)
        thread = ExceptionalThread(
            target=lambda: server.run(opts.listen,
                                      opts.port,
                                      server='cherrypy'))
        thread.daemon = True
        thread.start()

        log.info('Ready.')
        app.wait_forever()
Esempio n. 26
0
    def __init__(self,
                 path_detector,
                 interval=POLLING_INTERVAL,
                 task_process_collection_interval=TaskResourceMonitor.
                 PROCESS_COLLECTION_INTERVAL,
                 enable_mesos_disk_collector=False,
                 disk_collector_settings=DiskCollectorSettings()):

        self._detector = ObserverTaskDetector(path_detector, self.__on_active,
                                              self.__on_finished,
                                              self.__on_removed)
        self._interval = interval
        self._task_process_collection_interval = task_process_collection_interval
        self._enable_mesos_disk_collector = enable_mesos_disk_collector
        self._disk_collector_settings = disk_collector_settings
        self._active_tasks = {}  # task_id => ActiveObservedTask
        self._finished_tasks = {}  # task_id => FinishedObservedTask
        self._stop_event = threading.Event()
        ExceptionalThread.__init__(self)
        Lockable.__init__(self)
        self.daemon = True
Esempio n. 27
0
    def main(args, opts):
        """Main"""
        zkconn = kazoo_client.TwitterKazooClient(opts.zk)
        zkconn.start()

        server = RedirServer(zkconn, opts.zk_basepath, opts.scheduler_url,
                             opts.subdomain, opts.base_domain)
        thread = ExceptionalThread(
            target=lambda: server.run(opts.listen,
                                      opts.port,
                                      server='cherrypy'))
        thread.daemon = True
        thread.start()

        # Wait forever, basically.
        thread.join()
Esempio n. 28
0
    def main(args, opts):
        if args:
            print("ERROR: unrecognized arguments: %s\n" % (" ".join(args)),
                  file=sys.stderr)
            app.help()
            sys.exit(1)

        root_server = HttpServer()
        root_server.mount_routes(DiagnosticsEndpoints())

        task_observer = TaskObserver(opts.root)
        task_observer.start()

        bottle_wrapper = BottleObserver(task_observer)

        root_server.mount_routes(bottle_wrapper)

        def run():
            root_server.run('0.0.0.0', opts.port, 'cherrypy')

        et = ExceptionalThread(target=run)
        et.daemon = True
        et.start()
        et.join()
Esempio n. 29
0
def main(args, opts):
  if args:
    print("ERROR: unrecognized arguments: %s\n" % (" ".join(args)), file=sys.stderr)
    app.help()
    sys.exit(1)

  root_server = HttpServer()
  root_server.mount_routes(DiagnosticsEndpoints())

  task_observer = TaskObserver(opts.root)
  task_observer.start()

  bottle_wrapper = BottleObserver(task_observer)

  root_server.mount_routes(bottle_wrapper)

  def run():
    root_server.run('0.0.0.0', opts.port, 'cherrypy')

  et = ExceptionalThread(target=run)
  et.daemon = True
  et.start()
  et.join()
Esempio n. 30
0
    def main(args, options):
        log.info("Options in use: %s", options)

        if not options.api_port:
            app.error('Must specify --port')

        if not options.mesos_master:
            app.error('Must specify --mesos_master')

        if not options.framework_user:
            app.error('Must specify --framework_user')

        if not options.executor_uri:
            app.error('Must specify --executor_uri')

        if not options.executor_cmd:
            app.error('Must specify --executor_cmd')

        if not options.zk_url:
            app.error('Must specify --zk_url')

        if not options.admin_keypath:
            app.error('Must specify --admin_keypath')

        try:
            election_timeout = parse_time(options.election_timeout)
            framework_failover_timeout = parse_time(
                options.framework_failover_timeout)
        except InvalidTime as e:
            app.error(e.message)

        try:
            _, zk_servers, zk_root = zookeeper.parse(options.zk_url)
        except Exception as e:
            app.error("Invalid --zk_url: %s" % e.message)

        web_assets_dir = os.path.join(options.work_dir, "web")
        pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH)
        log.info("Extracted web assets into %s" % options.work_dir)

        fw_principal = None
        fw_secret = None
        if options.framework_authentication_file:
            try:
                with open(options.framework_authentication_file, "r") as f:
                    cred = yaml.load(f)
                fw_principal = cred["principal"]
                fw_secret = cred["secret"]
                log.info(
                    "Loaded credential (principal=%s) for framework authentication"
                    % fw_principal)
            except IOError as e:
                app.error(
                    "Unable to read the framework authentication key file: %s"
                    % e)
            except (KeyError, yaml.YAMLError) as e:
                app.error(
                    "Invalid framework authentication key file format %s" % e)

        log.info("Starting Mysos scheduler")

        kazoo = KazooClient(zk_servers)
        kazoo.start()

        if options.state_storage == 'zk':
            log.info("Using ZooKeeper (path: %s) for state storage" % zk_root)
            state_provider = ZooKeeperStateProvider(kazoo, zk_root)
        else:
            log.info("Using local disk for state storage")
            state_provider = LocalStateProvider(options.work_dir)

        try:
            state = state_provider.load_scheduler_state()
        except StateProvider.Error as e:
            app.error(e.message)

        if state:
            log.info("Successfully restored scheduler state")
            framework_info = state.framework_info
            if framework_info.HasField('id'):
                log.info("Recovered scheduler's FrameworkID is %s" %
                         framework_info.id.value)
        else:
            log.info("No scheduler state to restore")
            framework_info = FrameworkInfo(
                user=options.framework_user,
                name=FRAMEWORK_NAME,
                checkpoint=True,
                failover_timeout=framework_failover_timeout.as_(Time.SECONDS),
                role=options.framework_role)
            if fw_principal:
                framework_info.principal = fw_principal
            state = Scheduler(framework_info)
            state_provider.dump_scheduler_state(state)

        scheduler = MysosScheduler(state,
                                   state_provider,
                                   options.framework_user,
                                   options.executor_uri,
                                   options.executor_cmd,
                                   kazoo,
                                   options.zk_url,
                                   election_timeout,
                                   options.admin_keypath,
                                   installer_args=options.installer_args,
                                   backup_store_args=options.backup_store_args,
                                   executor_environ=options.executor_environ,
                                   framework_role=options.framework_role)

        if fw_principal and fw_secret:
            cred = Credential(principal=fw_principal, secret=fw_secret)
            scheduler_driver = mesos.native.MesosSchedulerDriver(
                scheduler, framework_info, options.mesos_master, cred)
        else:
            scheduler_driver = mesos.native.MesosSchedulerDriver(
                scheduler, framework_info, options.mesos_master)

        scheduler_driver.start()

        server = HttpServer()
        server.mount_routes(MysosServer(scheduler, web_assets_dir))

        et = ExceptionalThread(target=server.run,
                               args=('0.0.0.0', options.api_port, 'cherrypy'))
        et.daemon = True
        et.start()

        try:
            # Wait for the scheduler to stop.
            # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the
            # process with SIGINT.
            while not scheduler.stopped.wait(timeout=0.5):
                pass
        except KeyboardInterrupt:
            log.info('Interrupted, exiting.')
        else:
            log.info('Scheduler exited.')

        app.shutdown(
            1
        )  # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
Esempio n. 31
0
 def __init__(self, period, clock):
     self._stop = threading.Event()
     self._period = period
     self._clock = clock
     ExceptionalThread.__init__(self)
     self.daemon = True
Esempio n. 32
0
 def start(self):
   ExceptionalThread.start(self)
Esempio n. 33
0
 def __init__(self, period, clock):
   self._stop = threading.Event()
   self._period = period
   self._clock = clock
   ExceptionalThread.__init__(self)
   self.daemon = True
Esempio n. 34
0
  def main(args, options):
    log.info("Options in use: %s", options)

    if not options.api_port:
      app.error('Must specify --port')

    if not options.mesos_master:
      app.error('Must specify --mesos_master')

    if not options.framework_user:
      app.error('Must specify --framework_user')

    if not options.executor_uri:
      app.error('Must specify --executor_uri')

    if not options.executor_cmd:
      app.error('Must specify --executor_cmd')

    if not options.zk_url:
      app.error('Must specify --zk_url')

    if not options.admin_keypath:
      app.error('Must specify --admin_keypath')

    try:
      election_timeout = parse_time(options.election_timeout)
      framework_failover_timeout = parse_time(options.framework_failover_timeout)
    except InvalidTime as e:
      app.error(e.message)

    try:
      _, zk_servers, zk_root = zookeeper.parse(options.zk_url)
    except Exception as e:
      app.error("Invalid --zk_url: %s" % e.message)

    web_assets_dir = os.path.join(options.work_dir, "web")
    pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH)
    log.info("Extracted web assets into %s" % options.work_dir)

    fw_principal = None
    fw_secret = None
    if options.framework_authentication_file:
      try:
        with open(options.framework_authentication_file, "r") as f:
          cred = yaml.load(f)
        fw_principal = cred["principal"]
        fw_secret = cred["secret"]
        log.info("Loaded credential (principal=%s) for framework authentication" % fw_principal)
      except IOError as e:
        app.error("Unable to read the framework authentication key file: %s" % e)
      except (KeyError, yaml.YAMLError) as e:
        app.error("Invalid framework authentication key file format %s" % e)

    log.info("Starting Mysos scheduler")

    kazoo = KazooClient(zk_servers)
    kazoo.start()

    if options.state_storage == 'zk':
      log.info("Using ZooKeeper (path: %s) for state storage" % zk_root)
      state_provider = ZooKeeperStateProvider(kazoo, zk_root)
    else:
      log.info("Using local disk for state storage")
      state_provider = LocalStateProvider(options.work_dir)

    try:
      state = state_provider.load_scheduler_state()
    except StateProvider.Error as e:
      app.error(e.message)

    if state:
      log.info("Successfully restored scheduler state")
      framework_info = state.framework_info
      if framework_info.HasField('id'):
        log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value)
    else:
      log.info("No scheduler state to restore")
      framework_info = FrameworkInfo(
          user=options.framework_user,
          name=FRAMEWORK_NAME,
          checkpoint=True,
          failover_timeout=framework_failover_timeout.as_(Time.SECONDS),
          role=options.framework_role)
      if fw_principal:
        framework_info.principal = fw_principal
      state = Scheduler(framework_info)
      state_provider.dump_scheduler_state(state)

    scheduler = MysosScheduler(
        state,
        state_provider,
        options.framework_user,
        options.executor_uri,
        options.executor_cmd,
        kazoo,
        options.zk_url,
        election_timeout,
        options.admin_keypath,
        installer_args=options.installer_args,
        backup_store_args=options.backup_store_args,
        executor_environ=options.executor_environ,
        framework_role=options.framework_role)

    if fw_principal and fw_secret:
      cred = Credential(principal=fw_principal, secret=fw_secret)
      scheduler_driver = mesos.native.MesosSchedulerDriver(
          scheduler,
          framework_info,
          options.mesos_master,
          cred)
    else:
      scheduler_driver = mesos.native.MesosSchedulerDriver(
          scheduler,
          framework_info,
          options.mesos_master)

    scheduler_driver.start()

    server = HttpServer()
    server.mount_routes(MysosServer(scheduler, web_assets_dir))

    et = ExceptionalThread(
        target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy'))
    et.daemon = True
    et.start()

    try:
      # Wait for the scheduler to stop.
      # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the
      # process with SIGINT.
      while not scheduler.stopped.wait(timeout=0.5):
        pass
    except KeyboardInterrupt:
      log.info('Interrupted, exiting.')
    else:
      log.info('Scheduler exited.')

    app.shutdown(1)  # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
Esempio n. 35
0
 def start(self):
     ExceptionalThread.start(self)
Esempio n. 36
0
 def start(self):
     StatusChecker.start(self)
     ExceptionalThread.start(self)
 def start(self):
   StatusChecker.start(self)
   ExceptionalThread.start(self)