def testProgramAbortOnInputReaderThreadCrash(
    self, osExitMock, _slotAgentClassMock, modelSwapperInterfaceClassMock):
    # Verify that a crash in notification reader thread results in a call
    # to os._exit()

    # Configure ModelSwapperInterface mock
    swapperMock = modelSwapperInterfaceClassMock.return_value
    swapperMock.initSchedulerNotification.side_effect = Exception(
      "testProgramAbortOnInputReaderThreadCrash")

    osExitArgQ = Queue.Queue()
    osExitMock.side_effect = osExitArgQ.put

    sc = SwapController(concurrency=1)
    self.assertEqual(len(sc._slotAgents), 1)

    # Request stop, so that the main loop will exit ASAP
    sc.requestStopTS()

    # Run SwapController in a thread
    def runSwapControllerThread(sc, runResultQ):
      try:
        g_logger.info("Swap Controller run-thread is running")
        try:
          r = sc.run()
        except Exception as e:
          runResultQ.put(e)
        else:
          runResultQ.put(r)
      finally:
        g_logger.info("Swap Controller run-thread is exiting")


    runResultQ = Queue.Queue()
    scThread = threading.Thread(
      target=runSwapControllerThread,
      name="runSwapControllerThread",
      args=(sc, runResultQ))
    scThread.setDaemon(True)
    scThread.start()

    # Wait for os._exit to be called
    self.assertEqual(
      osExitArgQ.get(timeout=5),
      SwapController._EXIT_CODE_ON_FAILURE_OF_NOTIFICATION_READER_THREAD)

    # Wait for the run-thread to stop
    g_logger.info("Waiting for SwapController run-thread to stop")
    scThread.join(timeout=5)
    self.assertFalse(scThread.isAlive())
    g_logger.info("SwapController run-thread stopped")

    runResult = runResultQ.get_nowait()
    self.assertIsNone(runResult)
Exemple #2
0
  def testProgramAbortOnInputReaderThreadCrash(
    self, osExitMock, _slotAgentClassMock, modelSwapperInterfaceClassMock):
    # Verify that a crash in notification reader thread results in a call
    # to os._exit()

    # Configure ModelSwapperInterface mock
    swapperMock = modelSwapperInterfaceClassMock.return_value
    swapperMock.initSchedulerNotification.side_effect = Exception(
      "testProgramAbortOnInputReaderThreadCrash")

    osExitArgQ = Queue.Queue()
    osExitMock.side_effect = osExitArgQ.put

    sc = SwapController(concurrency=1)
    self.assertEqual(len(sc._slotAgents), 1)

    # Request stop, so that the main loop will exit ASAP
    sc.requestStopTS()

    # Run SwapController in a thread
    def runSwapControllerThread(sc, runResultQ):
      try:
        g_logger.info("Swap Controller run-thread is running")
        try:
          r = sc.run()
        except Exception as e:
          runResultQ.put(e)
        else:
          runResultQ.put(r)
      finally:
        g_logger.info("Swap Controller run-thread is exiting")


    runResultQ = Queue.Queue()
    scThread = threading.Thread(
      target=runSwapControllerThread,
      name="runSwapControllerThread",
      args=(sc, runResultQ))
    scThread.setDaemon(True)
    scThread.start()

    # Wait for os._exit to be called
    self.assertEqual(
      osExitArgQ.get(timeout=5),
      SwapController._EXIT_CODE_ON_FAILURE_OF_NOTIFICATION_READER_THREAD)

    # Wait for the run-thread to stop
    g_logger.info("Waiting for SwapController run-thread to stop")
    scThread.join(timeout=5)
    self.assertFalse(scThread.isAlive())
    g_logger.info("SwapController run-thread stopped")

    runResult = runResultQ.get_nowait()
    self.assertIsNone(runResult)
    def testCreateSwapControllerAndDeleteIt(self, **_kwargs):
        # Instantiates, then deletes SwapController
        sc = SwapController(concurrency=3)

        self.assertEqual(len(sc._slotAgents), 3)

        del sc
Exemple #4
0
    def __init__(self, concurrency):
        self._logger = _getLogger()
        self._concurrency = concurrency

        self._signalPipeReadFD, self._signalPipeWriteFD = os.pipe()
        # Make the write end non-blocking to prevent accidental deadlocking of the
        # signal dispatcher
        fcntl.fcntl(
            self._signalPipeWriteFD, fcntl.F_SETFL,
            fcntl.fcntl(self._signalPipeWriteFD, fcntl.F_GETFL)
            | os.O_NONBLOCK)

        # Register for signals of interest
        self._signalsOfInterest = [
            signal.SIGHUP, signal.SIGTERM, signal.SIGINT
        ]
        for sig in self._signalsOfInterest:
            signal.signal(sig, self._handleSignal)

        # Create the slot agents and swap controller.
        self._swapController = SwapController(concurrency=concurrency)
  def testRunSwapControllerAndStopIt(
    self, _slotAgentClassMock, modelSwapperInterfaceClassMock):
    # Instantiate SwapController instance, run it in a separate thread,
    # then stop SwapController.

    # Configure ModelSwapperInterface instance mock
    swapperMock = modelSwapperInterfaceClassMock.return_value
    notificationConsumer = DummyConsumer()
    swapperMock.consumeModelSchedulerNotifications.return_value = (
      notificationConsumer)

    sc = SwapController(concurrency=3)
    self.assertEqual(len(sc._slotAgents), 3)

    # Run it in a thread
    def runSwapControllerThread(sc, runResultQ):
      r = sc.run()
      runResultQ.put(r)

    runResultQ = Queue.Queue()
    scThread = threading.Thread(
      target=runSwapControllerThread,
      name="runSwapControllerThread",
      args=(sc, runResultQ))
    scThread.setDaemon(True)
    scThread.start()

    # Now stop it
    sc.requestStopTS()
    # Prod notification reader's consumer loop to detect the stop request and
    # exit gracefully by adding a dummy message
    notificationConsumer.q.put(None)

    # There isn't much going on, it should stop immediately
    scThread.join(timeout=5)
    self.assertFalse(scThread.isAlive())

    runResult = runResultQ.get_nowait()
    self.assertIsNone(runResult)
Exemple #6
0
  def testRunSwapControllerAndStopIt(
    self, _slotAgentClassMock, modelSwapperInterfaceClassMock):
    # Instantiate SwapController instance, run it in a separate thread,
    # then stop SwapController.

    # Configure ModelSwapperInterface instance mock
    swapperMock = modelSwapperInterfaceClassMock.return_value
    notificationConsumer = DummyConsumer()
    swapperMock.consumeModelSchedulerNotifications.return_value = (
      notificationConsumer)

    sc = SwapController(concurrency=3)
    self.assertEqual(len(sc._slotAgents), 3)

    # Run it in a thread
    def runSwapControllerThread(sc, runResultQ):
      r = sc.run()
      runResultQ.put(r)

    runResultQ = Queue.Queue()
    scThread = threading.Thread(
      target=runSwapControllerThread,
      name="runSwapControllerThread",
      args=(sc, runResultQ))
    scThread.setDaemon(True)
    scThread.start()

    # Now stop it
    sc.requestStopTS()
    # Prod notification reader's consumer loop to detect the stop request and
    # exit gracefully by adding a dummy message
    notificationConsumer.q.put(None)

    # There isn't much going on, it should stop immediately
    scThread.join(timeout=5)
    self.assertFalse(scThread.isAlive())

    runResult = runResultQ.get_nowait()
    self.assertIsNone(runResult)
Exemple #7
0
  def testDetectNotificationReaderThreadTargetCallFailed(
    self, _slotAgentClassMock, _modelSwapperInterfaceClassMock):
    # Reproduce failure to invoke the SwapController's input-reader thread
    # target and verify that SwapController's event loop raises the expected
    # exception

    sc = SwapController(concurrency=1)
    self.assertEqual(len(sc._slotAgents), 1)

    # Patch SwapController's input-thread object with one that will exhibit a
    # failure while trying to call the thread target
    def expectTwoArgs(_a, _b):
      pass
    t = threading.Thread(target=expectTwoArgs)
    t.setDaemon(True)
    patch.multiple(sc, _notificationReaderThread=t).start()

    # Attempt to run it in a thread
    def runSwapControllerThread(sc, runResultQ):
      try:
        g_logger.info("Swap Controller run-thread is running")
        try:
          r = sc.run()
        except Exception as e:
          runResultQ.put(e)
        else:
          runResultQ.put(r)
      finally:
        g_logger.info("Swap Controller run-thread is exiting")


    runResultQ = Queue.Queue()
    scThread = threading.Thread(
      target=runSwapControllerThread,
      name="runSwapControllerThread",
      args=(sc, runResultQ))
    scThread.setDaemon(True)
    scThread.start()

    # Wait for the run-thread to stop
    g_logger.info("Waiting for SwapController run-thread to stop")
    scThread.join(timeout=5)
    self.assertFalse(scThread.isAlive())
    g_logger.info("SwapController run-thread stopped")

    # Confirm the expected exception
    runResult = runResultQ.get_nowait()
    self.assertIsInstance(runResult, AssertionError)
    self.assertIn("Notification-reader thread failed to start in time",
                  runResult.args[0])
  def __init__(self, concurrency):
    self._logger = _getLogger()
    self._concurrency = concurrency

    self._signalPipeReadFD, self._signalPipeWriteFD = os.pipe()
    # Make the write end non-blocking to prevent accidental deadlocking of the
    # signal dispatcher
    fcntl.fcntl(
      self._signalPipeWriteFD,
      fcntl.F_SETFL,
      fcntl.fcntl(self._signalPipeWriteFD, fcntl.F_GETFL) | os.O_NONBLOCK)

    # Register for signals of interest
    self._signalsOfInterest = [signal.SIGHUP, signal.SIGTERM, signal.SIGINT]
    for sig in self._signalsOfInterest:
      signal.signal(sig, self._handleSignal)

    # Create the slot agents and swap controller.
    self._swapController = SwapController(concurrency=concurrency)
  def testModelPreemptionAndStop(
    self, slotAgentClassMock, modelSwapperInterfaceClassMock):
    # Test preemption of slots in SwapController

    # Configure ModelSwapperInterface instance mock
    swapperMock = modelSwapperInterfaceClassMock.return_value
    notificationConsumer = DummyConsumer()
    swapperMock.consumeModelSchedulerNotifications.return_value = (
      notificationConsumer)
    swapperMock.modelInputPending.side_effect = (lambda modelID:
      not modelInputDescriptors[modelID].requestBatchesQ.empty())

    # Configure SlotAgent class mock to create dummy slot agent instances and
    # add them to our list so that we can introspect them later
    concurrency = 3
    multiplier = 3
    numModels = concurrency * multiplier

    # Generate model IDs
    modelIDs = [hex(i) for i in xrange(numModels)]

    requestBatches = ("firstBatch", "secondBatch",)

    modelInputDescriptors = dict((
      modelID,
      _ModelInputDescriptor(
        requestBatches=requestBatches,
        consumeSizes=[1, 1]))
      for modelID in modelIDs)

    slotAgents = []
    slotAgentClassMock.side_effect = (lambda slotID:
      slotAgents.append(
        _DummySlotAgent(slotID, modelInputDescriptors.__getitem__))
      or slotAgents[-1])

    # Run SwapController in a thread
    sc = SwapController(concurrency=concurrency)
    self.assertEqual(len(sc._slotAgents), concurrency)
    self.assertEqual(len(slotAgents), concurrency)

    def runSwapControllerThread(sc, runResultQ):
      try:
        g_logger.info("Swap Controller run-thread is running")
        r = sc.run()
        runResultQ.put(r)
      finally:
        g_logger.info("Swap Controller run-thread is exiting")


    runResultQ = Queue.Queue()
    scThread = threading.Thread(
      target=runSwapControllerThread,
      name="runSwapControllerThread",
      args=(sc, runResultQ))
    scThread.setDaemon(True)
    scThread.start()

    # Prod SwapController to process all models
    for modelID in modelIDs:
      notificationConsumer.q.put(_createModelInputNotification(modelID))

    # Wait for model input queues to drain
    for modelID, desc in modelInputDescriptors.iteritems():
      g_logger.info("Waiting for model=%s inputQ to be empty", modelID)
      desc.requestBatchesQ.waitUntilEmpty(timeout=5)
      self.assertTrue(desc.requestBatchesQ.empty())
      g_logger.info("model=%s inputQ is empty", modelID)

    # Verify that all SlotAgents are occupied
    for sa in slotAgents:
      self.assertIsNotNone(sa.modelID)

    # Now stop SwapController
    g_logger.info("Requesting SwapController to stop")
    sc.requestStopTS()

    # So that the notification reader thread detects stop request and exits:
    notificationConsumer.q.put(_createModelInputNotification(modelID))

    g_logger.info("Waiting for SwapController run-thread to stop")
    scThread.join(timeout=5)
    self.assertFalse(scThread.isAlive())
    g_logger.info("SwapController run-thread stopped")

    # Verify that SwapController.run() returned without error
    self.assertIsNone(runResultQ.get_nowait())

    # Verify that all slot agents were closed
    for sa in slotAgents:
      self.assertEqual(sa.numCloseCalls, 1)

    # Verify that input data of all models was drained
    for modelID, desc in modelInputDescriptors.iteritems():
      g_logger.info("Verify empty input for model=%s", modelID)
      self.assertEqual(desc.requestBatchesQ.qsize(), 0)
      self.assertEqual(desc.requestBatchesProcessedQ.qsize(),
                       len(requestBatches))

    # Verify that all slot agents did work and were closed
    for sa in slotAgents:
      g_logger.info(
        "sa=%s: closeCalls=%s; startCalls=%s; stopCalls=%s; releaseCalls=%s",
        sa.slotID,
        sa.numCloseCalls,
        sa.numStartModelCalls,
        sa.numStopModelCalls,
        sa.numReleaseSlotCalls)

      self.assertEqual(sa.numCloseCalls, 1)

      self.assertEqual(sa.numStartModelCalls, multiplier * len(requestBatches))
      self.assertEqual(sa.numStopModelCalls, multiplier * len(requestBatches))
      self.assertEqual(sa.numReleaseSlotCalls, multiplier * len(requestBatches))
  def testSimpleSingleSuccessfulModelInputAndStop(
    self, slotAgentClassMock, modelSwapperInterfaceClassMock):
    # Instantiate SwapController instance, run it in a separate thread,
    # feed some data for one model, then stop SwapController.

    # Configure ModelSwapperInterface instance mock
    swapperMock = modelSwapperInterfaceClassMock.return_value
    notificationConsumer = DummyConsumer()
    swapperMock.consumeModelSchedulerNotifications.return_value = (
      notificationConsumer)
    swapperMock.modelInputPending.return_value = False

    # Configure SlotAgent class mock to create dummy slot agent instances and
    # add them to our list so that we can introspect them later
    modelID = "abcd"
    requestBatches = ("firstBatch", "secondBatch",)
    modelInputDesc = _ModelInputDescriptor(
      requestBatches=requestBatches,
      consumeSizes=[2,])
    modelInputDescriptors = {
      modelID: modelInputDesc
    }
    slotAgents = []
    slotAgentClassMock.side_effect = (lambda slotID:
      slotAgents.append(
        _DummySlotAgent(slotID, modelInputDescriptors.__getitem__))
      or slotAgents[-1])

    # Run SwapController in a thread
    concurrency = 3
    sc = SwapController(concurrency=concurrency)
    self.assertEqual(len(sc._slotAgents), concurrency)
    self.assertEqual(len(slotAgents), concurrency)

    def runSwapControllerThread(sc, runResultQ):
      try:
        g_logger.info("Swap Controller run-thread is running")
        r = sc.run()
        runResultQ.put(r)
      except:
        runResultQ.put(sys.exc_info()[1])
        raise
      finally:
        g_logger.info("Swap Controller run-thread is exiting")


    runResultQ = Queue.Queue()
    scThread = threading.Thread(
      target=runSwapControllerThread,
      name="runSwapControllerThread",
      args=(sc, runResultQ))
    scThread.setDaemon(True)
    scThread.start()

    # Prod SwapController to process model input
    notificationConsumer.q.put(_createModelInputNotification(modelID))

    # Wait for the model input queue to drain
    g_logger.info("Waiting for model inputQ to be empty")
    modelInputDesc.requestBatchesQ.waitUntilEmpty(timeout=5)
    self.assertTrue(modelInputDesc.requestBatchesQ.empty())
    g_logger.info("model inputQ is empty")

    # Now stop SwapController
    g_logger.info("Requesting SwapController to stop")
    sc.requestStopTS()

    # So that the notification reader thread detects stop request and exits:
    notificationConsumer.q.put(_createModelInputNotification(modelID))

    g_logger.info("Waiting for SwapController run-thread to stop")
    scThread.join(timeout=5)
    self.assertFalse(scThread.isAlive())
    g_logger.info("SwapController run-thread stopped")

    # Verify that SwapController.run() returned without error
    self.assertIsNone(runResultQ.get_nowait())

    # Verify that all slot agents were closed
    for sa in slotAgents:
      self.assertEqual(sa.numCloseCalls, 1)

    # Verify that a single slot agent handled all the input data
    targetSA = None
    for sa in slotAgents:
      if sa.numStartModelCalls > 0:
        self.assertIsNone(targetSA)
        targetSA = sa
        self.assertEqual(sa.numStartModelCalls, 1)
        self.assertEqual(sa.numStopModelCalls, 1)
        self.assertEqual(modelInputDesc.requestBatchesProcessedQ.qsize(),
                         len(requestBatches))
      else:
        self.assertEqual(sa.numStartModelCalls, 0)
        self.assertEqual(sa.numStopModelCalls, 0)

    self.assertIsNotNone(targetSA)
class ModelSchedulerService(object):

  # Error code that will be passed to os._exit when the thread that runs
  # SwapController experiences an unhandled exception
  _ABORT_PROGRAM_ON_THREAD_EXCEPTION_EXIT_CODE = 1

  def __init__(self, concurrency):
    self._logger = _getLogger()
    self._concurrency = concurrency

    self._signalPipeReadFD, self._signalPipeWriteFD = os.pipe()
    # Make the write end non-blocking to prevent accidental deadlocking of the
    # signal dispatcher
    fcntl.fcntl(
      self._signalPipeWriteFD,
      fcntl.F_SETFL,
      fcntl.fcntl(self._signalPipeWriteFD, fcntl.F_GETFL) | os.O_NONBLOCK)

    # Register for signals of interest
    self._signalsOfInterest = [signal.SIGHUP, signal.SIGTERM, signal.SIGINT]
    for sig in self._signalsOfInterest:
      signal.signal(sig, self._handleSignal)

    # Create the slot agents and swap controller.
    self._swapController = SwapController(concurrency=concurrency)


  def __enter__(self):
    """ Context Manager protocol method. Allows a ModelSchedulerService instance
    to be used in a "with" statement for automatic clean-up

    Parameters:
    ------------------------------------------------------------------------
    retval:     self.
    """
    return self


  def __exit__(self, excType, excVal, excTb):
    """ Context Manager protocol method. Allows a ModelSchedulerService instance
    to be used in a "with" statement for automatic cleanup

    Returns: False so as not to suppress the exception, if any
    """
    self._close()
    return False


  def _close(self):
    """ Gracefully stop the Model Scheduler """
    self._logger.info("Closing...")

    # Unregister from signal notifications
    for sig in self._signalsOfInterest:
      signal.signal(sig, signal.SIG_DFL)

    os.close(self._signalPipeReadFD)
    self._signalPipeReadFD = None
    os.close(self._signalPipeWriteFD)
    self._signalPipeWriteFD = None


  def run(self):
    """
    Returns: True if service should be restarted, False otherwise
    """
    self._logger.info("Running: pid=%s", os.getpid())
    quitPipeFileObj = os.fdopen(os.dup(self._signalPipeReadFD))

    swapControllerThread = threading.Thread(
        target=self._runSwapControllerThread,
        name="%s-%s" % (self._swapController.__class__.__name__,
                        id(self._swapController)))
    swapControllerThread.setDaemon(True)
    swapControllerThread.start()

    while True:
      try:
        signalnum = int(quitPipeFileObj.readline())
      except IOError as e:
        if e.errno != errno.EINTR:
          raise

        # System call was interrupted by signal - restart it
        continue
      else:
        break
    self._logger.info("Stopping service due to signal %s", signalnum)

    # Call swap controller requestStopTS method and then join the thread
    # running its run method.
    self._swapController.requestStopTS()
    swapControllerThread.join(_SWAP_CONTROLLER_JOIN_TIMEOUT)
    assert not swapControllerThread.isAlive(), (
        "Swap controller thread did not join in the allotted time "
        "(%g seconds)." % _SWAP_CONTROLLER_JOIN_TIMEOUT)

    return signalnum == signal.SIGHUP


  @abortProgramOnAnyException(
    exitCode=_ABORT_PROGRAM_ON_THREAD_EXCEPTION_EXIT_CODE, logger=_getLogger())
  def _runSwapControllerThread(self):
    self._swapController.run()


  def _handleSignal(self, signalnum, _frame):
    """ Handle system signal; write it to the pipe so that it may be processed
    by the main thread.
    """
    try:
      os.write(self._signalPipeWriteFD, "%s\n" % (signalnum,))
    except IOError as e:
      if e.errno in [errno.EWOULDBLOCK, errno.EAGAIN]:
        # Drop the signal if we were overwhelmed by signals to the point of
        # running out of pipe buffer (this shouldn't happen)
        pass
      else:
        raise
    def testModelPreemptionAndStop(self, slotAgentClassMock,
                                   modelSwapperInterfaceClassMock):
        # Test preemption of slots in SwapController

        # Configure ModelSwapperInterface instance mock
        swapperMock = modelSwapperInterfaceClassMock.return_value
        notificationConsumer = DummyConsumer()
        swapperMock.consumeModelSchedulerNotifications.return_value = (
            notificationConsumer)
        swapperMock.modelInputPending.side_effect = (
            lambda modelID: not modelInputDescriptors[
                modelID].requestBatchesQ.empty())

        # Configure SlotAgent class mock to create dummy slot agent instances and
        # add them to our list so that we can introspect them later
        concurrency = 3
        multiplier = 3
        numModels = concurrency * multiplier

        # Generate model IDs
        modelIDs = [hex(i) for i in xrange(numModels)]

        requestBatches = (
            "firstBatch",
            "secondBatch",
        )

        modelInputDescriptors = dict(
            (modelID,
             _ModelInputDescriptor(requestBatches=requestBatches,
                                   consumeSizes=[1, 1]))
            for modelID in modelIDs)

        slotAgents = []
        slotAgentClassMock.side_effect = (lambda slotID: slotAgents.append(
            _DummySlotAgent(slotID, modelInputDescriptors.__getitem__)) or
                                          slotAgents[-1])

        # Run SwapController in a thread
        sc = SwapController(concurrency=concurrency)
        self.assertEqual(len(sc._slotAgents), concurrency)
        self.assertEqual(len(slotAgents), concurrency)

        def runSwapControllerThread(sc, runResultQ):
            try:
                g_logger.info("Swap Controller run-thread is running")
                r = sc.run()
                runResultQ.put(r)
            finally:
                g_logger.info("Swap Controller run-thread is exiting")

        runResultQ = Queue.Queue()
        scThread = threading.Thread(target=runSwapControllerThread,
                                    name="runSwapControllerThread",
                                    args=(sc, runResultQ))
        scThread.setDaemon(True)
        scThread.start()

        # Prod SwapController to process all models
        for modelID in modelIDs:
            notificationConsumer.q.put(_createModelInputNotification(modelID))

        # Wait for model input queues to drain
        for modelID, desc in modelInputDescriptors.iteritems():
            g_logger.info("Waiting for model=%s inputQ to be empty", modelID)
            desc.requestBatchesQ.waitUntilEmpty(timeout=5)
            self.assertTrue(desc.requestBatchesQ.empty())
            g_logger.info("model=%s inputQ is empty", modelID)

        # Verify that all SlotAgents are occupied
        for sa in slotAgents:
            self.assertIsNotNone(sa.modelID)

        # Now stop SwapController
        g_logger.info("Requesting SwapController to stop")
        sc.requestStopTS()

        # So that the notification reader thread detects stop request and exits:
        notificationConsumer.q.put(_createModelInputNotification(modelID))

        g_logger.info("Waiting for SwapController run-thread to stop")
        scThread.join(timeout=5)
        self.assertFalse(scThread.isAlive())
        g_logger.info("SwapController run-thread stopped")

        # Verify that SwapController.run() returned without error
        self.assertIsNone(runResultQ.get_nowait())

        # Verify that all slot agents were closed
        for sa in slotAgents:
            self.assertEqual(sa.numCloseCalls, 1)

        # Verify that input data of all models was drained
        for modelID, desc in modelInputDescriptors.iteritems():
            g_logger.info("Verify empty input for model=%s", modelID)
            self.assertEqual(desc.requestBatchesQ.qsize(), 0)
            self.assertEqual(desc.requestBatchesProcessedQ.qsize(),
                             len(requestBatches))

        # Verify that all slot agents did work and were closed
        for sa in slotAgents:
            g_logger.info(
                "sa=%s: closeCalls=%s; startCalls=%s; stopCalls=%s; releaseCalls=%s",
                sa.slotID, sa.numCloseCalls, sa.numStartModelCalls,
                sa.numStopModelCalls, sa.numReleaseSlotCalls)

            self.assertEqual(sa.numCloseCalls, 1)

            self.assertEqual(sa.numStartModelCalls,
                             multiplier * len(requestBatches))
            self.assertEqual(sa.numStopModelCalls,
                             multiplier * len(requestBatches))
            self.assertEqual(sa.numReleaseSlotCalls,
                             multiplier * len(requestBatches))
    def testSimpleSingleSuccessfulModelInputAndStop(
            self, slotAgentClassMock, modelSwapperInterfaceClassMock):
        # Instantiate SwapController instance, run it in a separate thread,
        # feed some data for one model, then stop SwapController.

        # Configure ModelSwapperInterface instance mock
        swapperMock = modelSwapperInterfaceClassMock.return_value
        notificationConsumer = DummyConsumer()
        swapperMock.consumeModelSchedulerNotifications.return_value = (
            notificationConsumer)
        swapperMock.modelInputPending.return_value = False

        # Configure SlotAgent class mock to create dummy slot agent instances and
        # add them to our list so that we can introspect them later
        modelID = "abcd"
        requestBatches = (
            "firstBatch",
            "secondBatch",
        )
        modelInputDesc = _ModelInputDescriptor(requestBatches=requestBatches,
                                               consumeSizes=[
                                                   2,
                                               ])
        modelInputDescriptors = {modelID: modelInputDesc}
        slotAgents = []
        slotAgentClassMock.side_effect = (lambda slotID: slotAgents.append(
            _DummySlotAgent(slotID, modelInputDescriptors.__getitem__)) or
                                          slotAgents[-1])

        # Run SwapController in a thread
        concurrency = 3
        sc = SwapController(concurrency=concurrency)
        self.assertEqual(len(sc._slotAgents), concurrency)
        self.assertEqual(len(slotAgents), concurrency)

        def runSwapControllerThread(sc, runResultQ):
            try:
                g_logger.info("Swap Controller run-thread is running")
                r = sc.run()
                runResultQ.put(r)
            except:
                runResultQ.put(sys.exc_info()[1])
                raise
            finally:
                g_logger.info("Swap Controller run-thread is exiting")

        runResultQ = Queue.Queue()
        scThread = threading.Thread(target=runSwapControllerThread,
                                    name="runSwapControllerThread",
                                    args=(sc, runResultQ))
        scThread.setDaemon(True)
        scThread.start()

        # Prod SwapController to process model input
        notificationConsumer.q.put(_createModelInputNotification(modelID))

        # Wait for the model input queue to drain
        g_logger.info("Waiting for model inputQ to be empty")
        modelInputDesc.requestBatchesQ.waitUntilEmpty(timeout=5)
        self.assertTrue(modelInputDesc.requestBatchesQ.empty())
        g_logger.info("model inputQ is empty")

        # Now stop SwapController
        g_logger.info("Requesting SwapController to stop")
        sc.requestStopTS()

        # So that the notification reader thread detects stop request and exits:
        notificationConsumer.q.put(_createModelInputNotification(modelID))

        g_logger.info("Waiting for SwapController run-thread to stop")
        scThread.join(timeout=5)
        self.assertFalse(scThread.isAlive())
        g_logger.info("SwapController run-thread stopped")

        # Verify that SwapController.run() returned without error
        self.assertIsNone(runResultQ.get_nowait())

        # Verify that all slot agents were closed
        for sa in slotAgents:
            self.assertEqual(sa.numCloseCalls, 1)

        # Verify that a single slot agent handled all the input data
        targetSA = None
        for sa in slotAgents:
            if sa.numStartModelCalls > 0:
                self.assertIsNone(targetSA)
                targetSA = sa
                self.assertEqual(sa.numStartModelCalls, 1)
                self.assertEqual(sa.numStopModelCalls, 1)
                self.assertEqual(
                    modelInputDesc.requestBatchesProcessedQ.qsize(),
                    len(requestBatches))
            else:
                self.assertEqual(sa.numStartModelCalls, 0)
                self.assertEqual(sa.numStopModelCalls, 0)

        self.assertIsNotNone(targetSA)
Exemple #14
0
class ModelSchedulerService(object):

    # Error code that will be passed to os._exit when the thread that runs
    # SwapController experiences an unhandled exception
    _ABORT_PROGRAM_ON_THREAD_EXCEPTION_EXIT_CODE = 1

    def __init__(self, concurrency):
        self._logger = _getLogger()
        self._concurrency = concurrency

        self._signalPipeReadFD, self._signalPipeWriteFD = os.pipe()
        # Make the write end non-blocking to prevent accidental deadlocking of the
        # signal dispatcher
        fcntl.fcntl(
            self._signalPipeWriteFD, fcntl.F_SETFL,
            fcntl.fcntl(self._signalPipeWriteFD, fcntl.F_GETFL)
            | os.O_NONBLOCK)

        # Register for signals of interest
        self._signalsOfInterest = [
            signal.SIGHUP, signal.SIGTERM, signal.SIGINT
        ]
        for sig in self._signalsOfInterest:
            signal.signal(sig, self._handleSignal)

        # Create the slot agents and swap controller.
        self._swapController = SwapController(concurrency=concurrency)

    def __enter__(self):
        """ Context Manager protocol method. Allows a ModelSchedulerService instance
    to be used in a "with" statement for automatic clean-up

    Parameters:
    ------------------------------------------------------------------------
    retval:     self.
    """
        return self

    def __exit__(self, excType, excVal, excTb):
        """ Context Manager protocol method. Allows a ModelSchedulerService instance
    to be used in a "with" statement for automatic cleanup

    Returns: False so as not to suppress the exception, if any
    """
        self._close()
        return False

    def _close(self):
        """ Gracefully stop the Model Scheduler """
        self._logger.info("Closing...")

        # Unregister from signal notifications
        for sig in self._signalsOfInterest:
            signal.signal(sig, signal.SIG_DFL)

        os.close(self._signalPipeReadFD)
        self._signalPipeReadFD = None
        os.close(self._signalPipeWriteFD)
        self._signalPipeWriteFD = None

    def run(self):
        """
    Returns: True if service should be restarted, False otherwise
    """
        self._logger.info("Running: pid=%s", os.getpid())
        quitPipeFileObj = os.fdopen(os.dup(self._signalPipeReadFD))

        swapControllerThread = threading.Thread(
            target=self._runSwapControllerThread,
            name="%s-%s" % (self._swapController.__class__.__name__,
                            id(self._swapController)))
        swapControllerThread.setDaemon(True)
        swapControllerThread.start()

        while True:
            try:
                signalnum = int(quitPipeFileObj.readline())
            except IOError as e:
                if e.errno != errno.EINTR:
                    raise

                # System call was interrupted by signal - restart it
                continue
            else:
                break
        self._logger.info("Stopping service due to signal %s", signalnum)

        # Call swap controller requestStopTS method and then join the thread
        # running its run method.
        self._swapController.requestStopTS()
        swapControllerThread.join(_SWAP_CONTROLLER_JOIN_TIMEOUT)
        assert not swapControllerThread.isAlive(), (
            "Swap controller thread did not join in the allotted time "
            "(%g seconds)." % _SWAP_CONTROLLER_JOIN_TIMEOUT)

        return signalnum == signal.SIGHUP

    @abortProgramOnAnyException(
        exitCode=_ABORT_PROGRAM_ON_THREAD_EXCEPTION_EXIT_CODE,
        logger=_getLogger())
    def _runSwapControllerThread(self):
        self._swapController.run()

    def _handleSignal(self, signalnum, _frame):
        """ Handle system signal; write it to the pipe so that it may be processed
    by the main thread.
    """
        try:
            os.write(self._signalPipeWriteFD, "%s\n" % (signalnum, ))
        except IOError as e:
            if e.errno in [errno.EWOULDBLOCK, errno.EAGAIN]:
                # Drop the signal if we were overwhelmed by signals to the point of
                # running out of pipe buffer (this shouldn't happen)
                pass
            else:
                raise