예제 #1
0
  def test_scheduler_master_failover(self):
    disconnected_event, disconnected_call, disconnected_side_effect = self.mock_method()
    registered_event, registered_call, registered_side_effect = self.mock_method()
    reregistered_event, reregistered_call, reregistered_side_effect = self.mock_method()
    scheduler = mock.create_autospec(Scheduler, spec_set=True)
    scheduler.disconnected = mock.Mock(side_effect=disconnected_side_effect)
    scheduler.registered = mock.Mock(side_effect=registered_side_effect)
    scheduler.reregistered = mock.Mock(side_effect=reregistered_side_effect)

    standby_master = MockMaster()
    self.context.spawn(standby_master)

    driver = PesosSchedulerDriver(
        scheduler, self.framework_info, str(self.master.pid), context=self.context)
    assert driver.start() == mesos_pb2.DRIVER_RUNNING

    driver.scheduler_process.connected.wait(timeout=MAX_TIMEOUT)
    assert driver.scheduler_process.connected.is_set()

    registered_event.wait(timeout=MAX_TIMEOUT)
    assert registered_event.is_set()

    # now fail the current master
    driver.detector.appoint(None)
    disconnected_event.wait(timeout=MAX_TIMEOUT)
    assert disconnected_event.is_set()

    # now appoint the new master
    driver.detector.appoint(standby_master.pid)
    standby_master.reregister_event.wait(timeout=MAX_TIMEOUT)
    assert standby_master.reregister_event.is_set()
    assert standby_master.frameworks == self.master.frameworks
    reregistered_event.wait(timeout=MAX_TIMEOUT)
    assert reregistered_event.is_set()
    assert reregistered_call.mock_calls == [
        mock.call(driver, pid_to_master_info(standby_master.master_id, standby_master.pid))]
    assert driver.stop() == mesos_pb2.DRIVER_STOPPED
예제 #2
0
  def test_scheduler_scheduler_failover(self):
    reregistered_event, reregistered_call, reregistered_side_effect = self.mock_method()
    scheduler = mock.create_autospec(Scheduler, spec_set=True)
    scheduler.reregistered = mock.Mock(side_effect=reregistered_side_effect)

    self.framework_info.id.MergeFrom(self.framework_id)

    driver = PesosSchedulerDriver(
        scheduler, self.framework_info, str(self.master.pid), context=self.context)
    assert driver.start() == mesos_pb2.DRIVER_RUNNING

    driver.scheduler_process.connected.wait(timeout=MAX_TIMEOUT)
    assert driver.scheduler_process.connected.is_set()

    reregistered_event.wait(timeout=MAX_TIMEOUT)
    assert reregistered_event.is_set()

    assert len(self.master.frameworks) == 1

    framework_id = next(iter(self.master.frameworks))
    assert reregistered_call.mock_calls == [
        mock.call(driver, pid_to_master_info(self.master.master_id, self.master.pid))]

    assert driver.stop() == mesos_pb2.DRIVER_STOPPED
예제 #3
0
class Command(BaseCommand):
    """Command that launches the Scale scheduler
    """

    option_list = BaseCommand.option_list + (
        make_option('-m', '--master', action='store', type='str', default=settings.MESOS_MASTER,
                    help=('The master to connect to')),
    )

    help = 'Launches the Scale scheduler'

    def handle(self, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the scheduler.
        """

        # Register a listener to handle clean shutdowns
        signal.signal(signal.SIGTERM, self._onsigterm)

        # TODO: clean this up
        mesos_master = options.get('master')

        logger.info('Scale Scheduler %s', settings.VERSION)

        try:
            scheduler_zk = settings.SCHEDULER_ZK
        except:
            scheduler_zk = None

        if scheduler_zk is not None:
            import socket
            from scheduler import cluster_utils
            my_id = socket.gethostname()
            cluster_utils.wait_for_leader(scheduler_zk, my_id, self.run_scheduler, mesos_master)
        else:
            # leader election is disabled
            self.run_scheduler(mesos_master)

    def run_scheduler(self, mesos_master):
        logger.info("I am the leader")
        self.scheduler = ScaleScheduler()

        framework = mesos_pb2.FrameworkInfo()
        framework.user = ''  # Have Mesos fill in the current user.
        framework.name = 'Scale'

        logger.info('Connecting to Mesos master at %s', mesos_master)

        # TODO(vinod): Make checkpointing the default when it is default on the slave.
        if MESOS_CHECKPOINT:
            logger.info('Enabling checkpoint for the framework')
            framework.checkpoint = True

        if MESOS_AUTHENTICATE:
            logger.info('Enabling authentication for the framework')

            if not DEFAULT_PRINCIPLE:
                logger.error('Expecting authentication principal in the environment')
                sys.exit(1)

            if not DEFAULT_SECRET:
                logger.error('Expecting authentication secret in the environment')
                sys.exit(1)

            credential = mesos_pb2.Credential()
            credential.principal = DEFAULT_PRINCIPLE
            credential.secret = DEFAULT_SECRET

            self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master, credential)
        else:
            self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master)

        status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1

        # Perform any required clean up operations like stopping background threads
        status = status or self._shutdown()

        logger.info('Exiting...')
        sys.exit(status)

    def _onsigterm(self, signum, _frame):
        """See signal callback registration: :py:func:`signal.signal`.

        This callback performs a clean shutdown when a TERM signal is received.
        """
        logger.info('Scheduler command terminated due to signal: %i', signum)
        self._shutdown()
        sys.exit(1)

    def _shutdown(self):
        """Performs any clean up required by this command.

        :returns: The exit status code based on whether the shutdown operation was clean with no exceptions.
        :rtype: int
        """
        status = 0

        try:
            if self.scheduler:
                self.scheduler.shutdown()
        except:
            logger.exception('Failed to properly shutdown Scale scheduler.')
            status = 1

        try:
            if self.driver:
                self.driver.stop()
        except:
            logger.exception('Failed to properly stop Mesos driver.')
            status = 1
        return status
예제 #4
0
class Command(BaseCommand):
    """Command that launches the Scale scheduler
    """

    option_list = BaseCommand.option_list + (make_option(
        '-m',
        '--master',
        action='store',
        type='str',
        default=settings.MESOS_MASTER,
        help=('The master to connect to')), )

    help = 'Launches the Scale scheduler'

    def handle(self, **options):
        """See :meth:`django.core.management.base.BaseCommand.handle`.

        This method starts the scheduler.
        """

        # Register a listener to handle clean shutdowns
        signal.signal(signal.SIGTERM, self._onsigterm)

        # TODO: clean this up
        mesos_master = options.get('master')

        logger.info('Scale Scheduler %s', settings.VERSION)

        try:
            scheduler_zk = settings.SCHEDULER_ZK
        except:
            scheduler_zk = None

        if scheduler_zk is not None:
            import socket
            from scheduler import cluster_utils
            my_id = socket.gethostname()
            cluster_utils.wait_for_leader(scheduler_zk, my_id,
                                          self.run_scheduler, mesos_master)
        else:
            # leader election is disabled
            self.run_scheduler(mesos_master)

    def run_scheduler(self, mesos_master):
        logger.info("I am the leader")
        self.scheduler = ScaleScheduler()

        framework = mesos_pb2.FrameworkInfo()
        framework.user = ''  # Have Mesos fill in the current user.
        framework.name = 'Scale'

        logger.info('Connecting to Mesos master at %s', mesos_master)

        # TODO(vinod): Make checkpointing the default when it is default on the slave.
        if MESOS_CHECKPOINT:
            logger.info('Enabling checkpoint for the framework')
            framework.checkpoint = True

        if MESOS_AUTHENTICATE:
            logger.info('Enabling authentication for the framework')

            if not DEFAULT_PRINCIPLE:
                logger.error(
                    'Expecting authentication principal in the environment')
                sys.exit(1)

            if not DEFAULT_SECRET:
                logger.error(
                    'Expecting authentication secret in the environment')
                sys.exit(1)

            credential = mesos_pb2.Credential()
            credential.principal = DEFAULT_PRINCIPLE
            credential.secret = DEFAULT_SECRET

            self.driver = MesosSchedulerDriver(self.scheduler, framework,
                                               mesos_master, credential)
        else:
            self.driver = MesosSchedulerDriver(self.scheduler, framework,
                                               mesos_master)

        status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1

        # Perform any required clean up operations like stopping background threads
        status = status or self._shutdown()

        logger.info('Exiting...')
        sys.exit(status)

    def _onsigterm(self, signum, _frame):
        """See signal callback registration: :py:func:`signal.signal`.

        This callback performs a clean shutdown when a TERM signal is received.
        """
        logger.info('Scheduler command terminated due to signal: %i', signum)
        self._shutdown()
        sys.exit(1)

    def _shutdown(self):
        """Performs any clean up required by this command.

        :returns: The exit status code based on whether the shutdown operation was clean with no exceptions.
        :rtype: int
        """
        status = 0

        try:
            if self.scheduler:
                self.scheduler.shutdown()
        except:
            logger.exception('Failed to properly shutdown Scale scheduler.')
            status = 1

        try:
            if self.driver:
                self.driver.stop()
        except:
            logger.exception('Failed to properly stop Mesos driver.')
            status = 1
        return status
예제 #5
0
        if not os.getenv("DEFAULT_SECRET"):
            print "Expecting authentication secret in the environment"
            sys.exit(1);

        credential = mesos_pb2.Credential()
        credential.principal = os.getenv("DEFAULT_PRINCIPAL")
        credential.secret = os.getenv("DEFAULT_SECRET")

        #framework.principal = os.getenv("DEFAULT_PRINCIPAL")

        driver = mesos.native.MesosSchedulerDriver(
            TestScheduler(executor),
            framework,
            sys.argv[1],
            credential)
    else:
        #No principal in this framework
        #framework.principal = "test-framework-python"

        driver = MesosSchedulerDriver(
            TestScheduler(executor),
            framework,
            sys.argv[1])

    status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1

    # Ensure that the driver process terminates.
    driver.stop();

    sys.exit(status)