Ejemplo n.º 1
0
class Command(BaseCommand):
    """Command that launches the Scale scheduler
    """

    help = 'Launches the Scale scheduler'

    def handle(self, *args, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the scheduler.
        """

        # Register a listener to handle clean shutdowns
        signal.signal(signal.SIGTERM, self._onsigterm)

        # Set up global shutdown
        global GLOBAL_SHUTDOWN
        GLOBAL_SHUTDOWN = self._shutdown

        logger.info('Scale Scheduler %s', settings.VERSION)

        self.run_scheduler(settings.MESOS_MASTER)

    def run_scheduler(self, mesos_master):
        logger.info("Scale rising...")
        self.scheduler = ScaleScheduler()
        self.scheduler.initialize()
        scheduler_mgr.hostname = socket.getfqdn()

        logger.info('Connecting to Mesos master at %s:', mesos_master)

        # By default use ZK for master detection
        self.client = MesosClient(
            mesos_urls=[settings.MESOS_MASTER],
            # We have to run tasks as root, so docker commands may be executed
            frameworkUser='******',
            frameworkName=settings.FRAMEWORK_NAME,
            frameworkHostname=scheduler_mgr.hostname,
            frameworkWebUI=settings.WEBSERVER_ADDRESS)
        if settings.SERVICE_SECRET:
            # We are in Enterprise mode and using service account
            self.client.set_service_account(json.loads(
                settings.SERVICE_SECRET))
        elif settings.PRINCIPAL and settings.SECRET:
            self.client.set_credentials(settings.PRINCIPAL, settings.SECRET)

        mesos_role = settings.MESOS_ROLE
        logger.info('Launching scheduler with role: %s' % mesos_role)
        self.client.set_role(settings.MESOS_ROLE)

        logger.info('Accepting offers from role: %s' %
                    settings.ACCEPTED_RESOURCE_ROLE)

        self.client.add_capability('GPU_RESOURCES')

        try:
            self.scheduler.run(self.client)
            status = 0
        except:
            status = 1
            logger.exception('Mesos Scheduler Driver returned an exception')

        #Perform a shut down and return any non-zero status
        shutdown_status = self._shutdown()
        status = status or shutdown_status

        logger.info('Exiting...')
        sys.exit(status)

    def _onsigterm(self, signum, _frame):
        """See signal callback registration: :py:func:`signal.signal`.

        This callback performs a clean shutdown when a TERM signal is received.
        """
        logger.info('Scheduler command terminated due to signal: %i', signum)
        self._shutdown()
        sys.exit(1)

    def _shutdown(self):
        """Performs any clean up required by this command.

        :returns: The exit status code based on whether the shutdown operation was clean with no exceptions.
        :rtype: int
        """
        status = 0

        try:
            if self.scheduler:
                self.scheduler.shutdown()
        except:
            logger.exception('Failed to properly shutdown Scale scheduler.')
            status = 1

        return status
Ejemplo n.º 2
0
class Test(object):
    class MesosFramework(threading.Thread):
        def __init__(self, client):
            threading.Thread.__init__(self)
            self.client = client
            self.stop = False

        def run(self):
            try:
                self.client.register()
            except KeyboardInterrupt:
                print('Stop requested by user, stopping framework....')

    def __init__(self):
        logging.basicConfig()
        self.logger = logging.getLogger(__name__)
        logging.getLogger('mesoshttp').setLevel(logging.DEBUG)

        self.driver = None
        # Note: leader.mesos address requires Mesos DNS
        #self.client = MesosClient(mesos_urls=['zk://leader.mesos:2181/mesos'])
        # If you are purely using Mesos, you should use explicit address of Master
        # Example: Zookeeper master discovery
        #self.client = MesosClient(mesos_urls=['zk://127.0.0.1:2181/mesos'])
        # Example: Directly address Mesos
        #self.client = MesosClient(mesos_urls=['http://127.0.0.1:5050'])

        # By default, use direct master addressing
        # Allow for comma delimited URLs to be passed in via MASTER_URLS
        # environment variable
        master_urls = os.getenv('MESOS_URLS', 'http://127.0.0.1:5050')
        self.client = MesosClient(mesos_urls=master_urls.split(','))

        secret = os.getenv('SERVICE_SECRET')
        if secret:
            self.client.set_service_account(json.loads(secret))
        self.client.on(MesosClient.SUBSCRIBED, self.subscribed)
        self.client.on(MesosClient.OFFERS, self.offer_received)
        self.client.on(MesosClient.UPDATE, self.status_update)
        self.th = Test.MesosFramework(self.client)
        self.th.start()
        while True and self.th.isAlive():
            try:
                self.th.join(1)
            except KeyboardInterrupt:
                self.shutdown()
                break

    def shutdown(self):
        print('Stop requested by user, stopping framework....')
        self.logger.warn('Stop requested by user, stopping framework....')
        self.client.stop = True
        self.driver.tearDown()
        self.stop = True

    def subscribed(self, driver):
        self.logger.warn('SUBSCRIBED')
        self.driver = driver

    def status_update(self, update):
        if update['status']['state'] == 'TASK_RUNNING':
            self.driver.kill(update['status']['agent_id']['value'],
                             update['status']['task_id']['value'])

    def offer_received(self, offers):
        self.logger.warn('OFFER: %s' % (str(offers)))
        i = 0
        for offer in offers:
            if i == 0:
                self.run_job(offer)
            else:
                offer.decline()
            i += 1

    def run_job(self, mesos_offer):
        offer = mesos_offer.get_offer()
        print(str(offer))
        task = {
            'name':
            'sample test',
            'task_id': {
                'value': uuid.uuid4().hex
            },
            'agent_id': {
                'value': offer['agent_id']['value']
            },
            'resources': [{
                'name': 'cpus',
                'type': 'SCALAR',
                'scalar': {
                    'value': 1
                }
            }, {
                'name': 'mem',
                'type': 'SCALAR',
                'scalar': {
                    'value': 1000
                }
            }],
            'command': {
                'value': 'sleep 30'
            },
            'container': {
                'type': 'MESOS',
                'mesos': {
                    'image': {
                        'type': 'DOCKER',
                        'docker': {
                            'name': 'debian'
                        }
                    }
                }
            }
        }

        mesos_offer.accept([task])