class Command(BaseCommand): """Command that launches the Scale scheduler """ help = 'Launches the Scale scheduler' def add_arguments(self, parser): parser.add_argument('-m', '--master', action='store', default=settings.MESOS_MASTER, help='The master to connect to') def handle(self, *args, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the scheduler. """ # Register a listener to handle clean shutdowns signal.signal(signal.SIGTERM, self._onsigterm) # TODO: clean this up mesos_master = options.get('master') logger.info('Scale Scheduler %s', settings.VERSION) try: scheduler_zk = settings.SCHEDULER_ZK except: scheduler_zk = None if scheduler_zk is not None: import socket from scheduler import cluster_utils my_id = socket.gethostname() cluster_utils.wait_for_leader(scheduler_zk, my_id, self.run_scheduler, mesos_master) else: # leader election is disabled self.run_scheduler(mesos_master) def run_scheduler(self, mesos_master): logger.info("I am the leader") self.scheduler = ScaleScheduler() framework = mesos_pb2.FrameworkInfo() framework.user = '' # Have Mesos fill in the current user. framework.name = os.getenv('DCOS_PACKAGE_FRAMEWORK_NAME', 'Scale') webserver_address = os.getenv('SCALE_WEBSERVER_ADDRESS') if webserver_address: framework.webui_url = webserver_address logger.info('Connecting to Mesos master at %s', mesos_master) # TODO(vinod): Make checkpointing the default when it is default on the slave. if MESOS_CHECKPOINT: logger.info('Enabling checkpoint for the framework') framework.checkpoint = True if MESOS_AUTHENTICATE: logger.info('Enabling authentication for the framework') if not DEFAULT_PRINCIPLE: logger.error( 'Expecting authentication principal in the environment') sys.exit(1) if not DEFAULT_SECRET: logger.error( 'Expecting authentication secret in the environment') sys.exit(1) credential = mesos_pb2.Credential() credential.principal = DEFAULT_PRINCIPLE credential.secret = DEFAULT_SECRET self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master, credential) else: self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master) try: status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1 except: status = 1 logger.exception('Mesos Scheduler Driver returned an exception') #Perform a shut down and return any non-zero status shutdown_status = self._shutdown status = status or shutdown_status logger.info('Exiting...') sys.exit(status) def _onsigterm(self, signum, _frame): """See signal callback registration: :py:func:`signal.signal`. This callback performs a clean shutdown when a TERM signal is received. """ logger.info('Scheduler command terminated due to signal: %i', signum) self._shutdown() sys.exit(1) def _shutdown(self): """Performs any clean up required by this command. :returns: The exit status code based on whether the shutdown operation was clean with no exceptions. :rtype: int """ status = 0 try: if self.scheduler: self.scheduler.shutdown() except: logger.exception('Failed to properly shutdown Scale scheduler.') status = 1 try: if self.driver: self.driver.stop() except: logger.exception('Failed to properly stop Mesos driver.') status = 1 return status
class Command(BaseCommand): """Command that launches the Scale scheduler """ option_list = BaseCommand.option_list + ( make_option('-m', '--master', action='store', type='str', default=settings.MESOS_MASTER, help=('The master to connect to')), ) help = 'Launches the Scale scheduler' def handle(self, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the scheduler. """ # Register a listener to handle clean shutdowns signal.signal(signal.SIGTERM, self._onsigterm) # TODO: clean this up mesos_master = options.get('master') logger.info('Scale Scheduler %s', settings.VERSION) try: scheduler_zk = settings.SCHEDULER_ZK except: scheduler_zk = None if scheduler_zk is not None: import socket from scheduler import cluster_utils my_id = socket.gethostname() cluster_utils.wait_for_leader(scheduler_zk, my_id, self.run_scheduler, mesos_master) else: # leader election is disabled self.run_scheduler(mesos_master) def run_scheduler(self, mesos_master): logger.info("I am the leader") self.scheduler = ScaleScheduler() framework = mesos_pb2.FrameworkInfo() framework.user = '' # Have Mesos fill in the current user. framework.name = 'Scale' logger.info('Connecting to Mesos master at %s', mesos_master) # TODO(vinod): Make checkpointing the default when it is default on the slave. if MESOS_CHECKPOINT: logger.info('Enabling checkpoint for the framework') framework.checkpoint = True if MESOS_AUTHENTICATE: logger.info('Enabling authentication for the framework') if not DEFAULT_PRINCIPLE: logger.error('Expecting authentication principal in the environment') sys.exit(1) if not DEFAULT_SECRET: logger.error('Expecting authentication secret in the environment') sys.exit(1) credential = mesos_pb2.Credential() credential.principal = DEFAULT_PRINCIPLE credential.secret = DEFAULT_SECRET self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master, credential) else: self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master) try: status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1 except: status = 1 logger.exception('Mesos Scheduler Driver returned an exception') #Perform a shut down and return any non-zero status shutdown_status = self._shutdown status = status or shutdown_status logger.info('Exiting...') sys.exit(status) def _onsigterm(self, signum, _frame): """See signal callback registration: :py:func:`signal.signal`. This callback performs a clean shutdown when a TERM signal is received. """ logger.info('Scheduler command terminated due to signal: %i', signum) self._shutdown() sys.exit(1) def _shutdown(self): """Performs any clean up required by this command. :returns: The exit status code based on whether the shutdown operation was clean with no exceptions. :rtype: int """ status = 0 try: if self.scheduler: self.scheduler.shutdown() except: logger.exception('Failed to properly shutdown Scale scheduler.') status = 1 try: if self.driver: self.driver.stop() except: logger.exception('Failed to properly stop Mesos driver.') status = 1 return status
class Command(BaseCommand): """Command that launches the Scale scheduler """ help = 'Launches the Scale scheduler' def handle(self, *args, **options): """See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the scheduler. """ # Register a listener to handle clean shutdowns signal.signal(signal.SIGTERM, self._onsigterm) # Set up global shutdown global GLOBAL_SHUTDOWN GLOBAL_SHUTDOWN = self._shutdown logger.info('Scale Scheduler %s', settings.VERSION) self.run_scheduler(settings.MESOS_MASTER) def run_scheduler(self, mesos_master): logger.info("Scale rising...") self.scheduler = ScaleScheduler() self.scheduler.initialize() scheduler_mgr.hostname = socket.getfqdn() logger.info('Connecting to Mesos master at %s:', mesos_master) # By default use ZK for master detection self.client = MesosClient( mesos_urls=[settings.MESOS_MASTER], # We have to run tasks as root, so docker commands may be executed frameworkUser='******', frameworkName=settings.FRAMEWORK_NAME, frameworkHostname=scheduler_mgr.hostname, frameworkWebUI=settings.WEBSERVER_ADDRESS) if settings.SERVICE_SECRET: # We are in Enterprise mode and using service account self.client.set_service_account(json.loads( settings.SERVICE_SECRET)) elif settings.PRINCIPAL and settings.SECRET: self.client.set_credentials(settings.PRINCIPAL, settings.SECRET) mesos_role = settings.MESOS_ROLE logger.info('Launching scheduler with role: %s' % mesos_role) self.client.set_role(settings.MESOS_ROLE) logger.info('Accepting offers from role: %s' % settings.ACCEPTED_RESOURCE_ROLE) self.client.add_capability('GPU_RESOURCES') try: self.scheduler.run(self.client) status = 0 except: status = 1 logger.exception('Mesos Scheduler Driver returned an exception') #Perform a shut down and return any non-zero status shutdown_status = self._shutdown() status = status or shutdown_status logger.info('Exiting...') sys.exit(status) def _onsigterm(self, signum, _frame): """See signal callback registration: :py:func:`signal.signal`. This callback performs a clean shutdown when a TERM signal is received. """ logger.info('Scheduler command terminated due to signal: %i', signum) self._shutdown() sys.exit(1) def _shutdown(self): """Performs any clean up required by this command. :returns: The exit status code based on whether the shutdown operation was clean with no exceptions. :rtype: int """ status = 0 try: if self.scheduler: self.scheduler.shutdown() except: logger.exception('Failed to properly shutdown Scale scheduler.') status = 1 return status
class Command(BaseCommand): '''Command that launches the Scale scheduler ''' option_list = BaseCommand.option_list + ( make_option('-m', '--master', action='store', type='str', default=settings.MESOS_MASTER, help=('The master to connect to')), ) help = 'Launches the Scale scheduler' def handle(self, **options): '''See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the scheduler. ''' # Register a listener to handle clean shutdowns signal.signal(signal.SIGTERM, self._onsigterm) # TODO: clean this up mesos_master = options.get('master') logger.info(u'Command starting: scale_scheduler') logger.info(u' - Master: %s', mesos_master) executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = 'scale' executor.command.value = '%s %s scale_executor' % (settings.PYTHON_EXECUTABLE, settings.MANAGE_FILE) executor.name = 'Scale Executor (Python)' self.scheduler = ScaleScheduler(executor) framework = mesos_pb2.FrameworkInfo() framework.user = '' # Have Mesos fill in the current user. framework.name = 'Scale Framework (Python)' # TODO(vinod): Make checkpointing the default when it is default on the slave. if MESOS_CHECKPOINT: logger.info('Enabling checkpoint for the framework') framework.checkpoint = True if MESOS_AUTHENTICATE: logger.info('Enabling authentication for the framework') if not DEFAULT_PRINCIPLE: logger.error('Expecting authentication principal in the environment') sys.exit(1) if not DEFAULT_SECRET: logger.error('Expecting authentication secret in the environment') sys.exit(1) credential = mesos_pb2.Credential() credential.principal = DEFAULT_PRINCIPLE credential.secret = DEFAULT_SECRET self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master, credential) else: self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master) status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1 # Perform any required clean up operations like stopping background threads status = status or self._shutdown() logger.info(u'Command completed: scale_scheduler') sys.exit(status) def _onsigterm(self, signum, _frame): '''See signal callback registration: :py:func:`signal.signal`. This callback performs a clean shutdown when a TERM signal is received. ''' logger.info(u'Scheduler command terminated due to signal: %i', signum) self._shutdown() sys.exit(1) def _shutdown(self): '''Performs any clean up required by this command. :returns: The exit status code based on whether the shutdown operation was clean with no exceptions. :rtype: int ''' status = 0 try: if self.scheduler: self.scheduler.shutdown() except: logger.exception('Failed to properly shutdown scale scheduler.') status = 1 try: if self.driver: self.driver.stop() except: logger.exception('Failed to properly stop Mesos driver.') status = 1 return status
class Command(BaseCommand): '''Command that launches the Scale scheduler ''' option_list = BaseCommand.option_list + (make_option( '-m', '--master', action='store', type='str', default=settings.MESOS_MASTER, help=('The master to connect to')), ) help = 'Launches the Scale scheduler' def handle(self, **options): '''See :meth:`django.core.management.base.BaseCommand.handle`. This method starts the scheduler. ''' # Register a listener to handle clean shutdowns signal.signal(signal.SIGTERM, self._onsigterm) # TODO: clean this up mesos_master = options.get('master') logger.info(u'Command starting: scale_scheduler') logger.info(u' - Master: %s', mesos_master) executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = 'scale' executor.command.value = '%s %s scale_executor' % ( settings.PYTHON_EXECUTABLE, settings.MANAGE_FILE) executor.name = 'Scale Executor (Python)' try: scheduler_zk = settings.SCHEDULER_ZK except: scheduler_zk = None if scheduler_zk is not None: import socket from scheduler import cluster_utils my_id = socket.gethostname() cluster_utils.wait_for_leader(scheduler_zk, my_id, self.run_scheduler, mesos_master, executor) else: # leader election is disabled self.run_scheduler(mesos_master, executor) def run_scheduler(self, mesos_master, executor): logger.info("I am the leader") self.scheduler = ScaleScheduler(executor) framework = mesos_pb2.FrameworkInfo() framework.user = '' # Have Mesos fill in the current user. framework.name = 'Scale Framework (Python)' # TODO(vinod): Make checkpointing the default when it is default on the slave. if MESOS_CHECKPOINT: logger.info('Enabling checkpoint for the framework') framework.checkpoint = True if MESOS_AUTHENTICATE: logger.info('Enabling authentication for the framework') if not DEFAULT_PRINCIPLE: logger.error( 'Expecting authentication principal in the environment') sys.exit(1) if not DEFAULT_SECRET: logger.error( 'Expecting authentication secret in the environment') sys.exit(1) credential = mesos_pb2.Credential() credential.principal = DEFAULT_PRINCIPLE credential.secret = DEFAULT_SECRET self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master, credential) else: self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master) status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1 # Perform any required clean up operations like stopping background threads status = status or self._shutdown() logger.info(u'Command completed: scale_scheduler') sys.exit(status) def _onsigterm(self, signum, _frame): '''See signal callback registration: :py:func:`signal.signal`. This callback performs a clean shutdown when a TERM signal is received. ''' logger.info(u'Scheduler command terminated due to signal: %i', signum) self._shutdown() sys.exit(1) def _shutdown(self): '''Performs any clean up required by this command. :returns: The exit status code based on whether the shutdown operation was clean with no exceptions. :rtype: int ''' status = 0 try: if self.scheduler: self.scheduler.shutdown() except: logger.exception('Failed to properly shutdown scale scheduler.') status = 1 try: if self.driver: self.driver.stop() except: logger.exception('Failed to properly stop Mesos driver.') status = 1 return status