Esempio n. 1
0
    def __init__(self, modelID):
        """
    :param modelID: model ID; string
    """
        self._logger = _getLogger()

        self._modelID = modelID

        self._swapperAPI = ModelSwapperInterface()

        self._archiver = _ModelArchiver(self._modelID)

        # "deleteModel" command handler sets this flag to force our processing
        # loop to terminate
        self._done = False

        modelSwapperConfig = ModelSwapperConfig()

        self._targetMaxRequestsPerCheckpoint = modelSwapperConfig.getint(
            "model_runner", "target_requests_per_checkpoint")

        self._profiling = (modelSwapperConfig.getboolean(
            "debugging", "profiling")
                           or self._logger.isEnabledFor(logging.DEBUG))

        if self._profiling:
            self._logger.info("Profiling is turned on")

            self._modelLoadSec = 0
    def run(self):
        with ModelSwapperInterface() as modelSwapper:
            engine = repository.engineFactory()
            while True:
                with engine.connect() as conn:
                    pendingStacks = repository.retryOnTransientErrors(
                        repository.getAutostackMetricsPendingDataCollection)(
                            conn)

                if not pendingStacks:
                    time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC)
                    continue

                # Build a sequence of autostack metric requests
                requests = []
                for autostack, metrics in pendingStacks:
                    refBase = len(requests)
                    requests.extend(
                        AutostackMetricRequest(refID=refBase + i,
                                               autostack=autostack,
                                               metric=metric)
                        for i, metric in enumerate(metrics))

                # Collect, aggregate, and stream metric data
                self._processAutostackMetricRequests(engine, requests,
                                                     modelSwapper)
    def __init__(self, concurrency):
        """
    concurrency: allowed number of model slots
    """
        self._logger = _getLogger()

        self._profiling = (ModelSwapperConfig().getboolean(
            "debugging", "profiling")
                           or self._logger.isEnabledFor(logging.DEBUG))

        # Allowed number of model slots
        self._concurrency = concurrency

        # Input-reader thread target function sets this when it starts running to
        # let our event loop know that things are off to a good start
        self._notificationReaderStartedEvent = threading.Event()

        self._notificationMutex = threading.Lock()
        # Mutex used to guaranteed that no further model input notifications will
        # be added to main event queue once self._stopNotificationReader is set

        # _runNotificationReaderThread will not process any more notifications
        # once it detects that this flag is true
        self._stopNotificationReader = False

        # The event loop will exit some time after an event handler sets this flag
        # to True
        self._eventLoopStopPending = False

        # (non-thread-safe) The tuple of all slot agents
        self._slotAgents = tuple(
            SlotAgent(slotID=i) for i in xrange(concurrency))
        assert self._slotAgents

        # Thread-safe event queue for SwapController
        self._eventQ = Queue.Queue()

        # Main event loop's ModelSwapperInterface instance. MUST NOT use from
        # threads because ModelSwapperInterface
        self._mainSwapper = ModelSwapperInterface()

        # A (non-thread-safe) FIFO of models that are waiting to be scheduled for
        # running; there is incoming data for them that needs to be processed
        self._waitingModelsFIFO = []

        # A (non-thread-safe) map of modelIDs to _RunningModelInfo instances
        self._runningModelsMap = dict()

        # A (non-thread-safe) list of free slot indexes into the self._slotsAgents
        # tuple
        self._freeSlots = list(xrange(len(self._slotAgents)))

        # (non-thread-safe) Indexes of SlotAgents pending preemption
        self._pendingPreemptSlotsSet = set()

        self._notificationReaderThread = threading.Thread(
            target=self._runNotificationReaderThread,
            name="%s-input-reader-%s" % (self.__class__.__name__, id(self)))
        # Allow process to exit even if thread is still running
        self._notificationReaderThread.setDaemon(True)
Esempio n. 4
0
    def testStartMultipleModelRunnersAndStopThem(self):
        # Starts several ModelRunners and stops them gracefully
        # to confirm that they can all stop without conflicting with each other:
        # if ModelRunnerProxy doesn't configure subprocess.Popen with
        # `close_fds=True`, then graceful shutdown will fail because the stdin
        # of some child processes will be cloned into those that are started
        # after them and closing stding of an earlier ModelRunner child process
        # won't have the desired effect of terminating that process (since other
        # clones of that file descriptor will prevent it from fully closing)
        #
        # TODO send commands to models and verify output

        runners = []

        modelIDs = tuple("abcdef" + str(i) for i in xrange(5))

        with ModelSwapperInterface() as swapper:
            modelInputMQs = tuple(
                swapper._getModelInputQName(modelID=modelID)
                for modelID in modelIDs)

        with amqp_test_utils.managedQueueDeleter(modelInputMQs):
            with MessageBusConnector() as bus:
                for mq in modelInputMQs:
                    bus.createMessageQueue(mq, durable=True)

            for modelID in modelIDs:
                runners.append(
                    slot_agent.ModelRunnerProxy(modelID=modelID,
                                                onTermination=lambda: None,
                                                logger=_LOGGER))

            returnCodes = [runner.stopGracefully() for runner in runners]

        self.assertEqual(returnCodes, [0] * len(runners))
Esempio n. 5
0
def runServer():
  # Get the current list of custom metrics
  appConfig = Config("application.conf",
                     os.environ["APPLICATION_CONFIG_PATH"])

  engine = repository.engineFactory(appConfig)
  global gCustomMetrics
  now = datetime.datetime.utcnow()

  with engine.connect() as conn:
    gCustomMetrics = dict(
      (m.name, [m, now]) for m in repository.getCustomMetrics(conn))

  queueName = appConfig.get("metric_listener", "queue_name")

  global gProfiling
  gProfiling = (appConfig.getboolean("debugging", "profiling") or
                LOGGER.isEnabledFor(logging.DEBUG))
  del appConfig

  metricStreamer = MetricStreamer()
  modelSwapper = ModelSwapperInterface()

  with MessageBusConnector() as bus:
    if not bus.isMessageQeueuePresent(queueName):
      bus.createMessageQueue(mqName=queueName, durable=True)
    LOGGER.info("Waiting for messages. To exit, press CTRL+C")
    with bus.consume(queueName) as consumer:
      messages = []
      messageRxTimes = []
      while True:
        message = consumer.pollOneMessage()
        if message is not None:
          messages.append(message)
          if gProfiling:
            messageRxTimes.append(time.time())

        if message is None or len(messages) >= MAX_MESSAGES_PER_BATCH:
          if messages:
            # Process the batch
            try:
              _handleBatch(engine,
                           messages,
                           messageRxTimes,
                           metricStreamer,
                           modelSwapper)
            except Exception:  # pylint: disable=W0703
              LOGGER.exception("Unknown failure in processing messages.")
              # Make sure that we ack messages when there is an unexpected error
              # to avoid getting hung forever on one bad record.

            # Ack all the messages
            messages[-1].ack(multiple=True)
            # Clear the message buffer
            messages = []
            messageRxTimes = []
          else:
            # Queue is empty, wait before retrying
            time.sleep(POLL_DELAY_SEC)
 def runConsumerThread(destList, numExpected):
     with ModelSwapperInterface() as swapper:
         with swapper.consumeResults() as consumer:
             for batch in consumer:
                 destList.append(batch)
                 batch.ack()
                 _LOGGER.info("Got result batch=%r", batch)
                 if len(destList) == numExpected:
                     break
    def _runNotificationReaderThread(self):
        """ Read model data notifications and pass them to the event loop """
        self._logger.info("Notification Reader thread is running")

        # Let the main event loop know that this thread started successfully
        self._notificationReaderStartedEvent.set()

        with ModelSwapperInterface() as swapperAPI:
            # First, make sure our notification message queue exists, so we don't
            # miss any new notifications while we're checking for models with pending
            # input
            self._logger.info("SWAPPER_API: %r", swapperAPI)
            swapperAPI.initSchedulerNotification()

            # At start, notify main event loop of each model whose input is non-empty
            self._logger.info("Checking for models with pending input")

            i = 0
            for i, modelID in enumerate(swapperAPI.getModelsWithInputPending(),
                                        1):
                self._logger.debug("Input pending for model=%s", modelID)
                self._newInputNotifyTS(modelID=modelID)

            self._logger.info("%s model(s) had pending input", i)

            # Service the SwapController's input queue util stop is requested
            with swapperAPI.consumeModelSchedulerNotifications() as consumer:
                numHandledNotifications = 0
                try:
                    for notification in consumer:

                        with self._notificationMutex:

                            if self._stopNotificationReader:
                                self._logger.info(
                                    "Notification reader exiting due to stop request"
                                )
                                break

                            self._newInputNotifyTS(modelID=notification.value)

                            notification.ack()

                            numHandledNotifications += 1
                    else:
                        raise Exception(
                            "Unexpected termination of consumer loop in "
                            "Notification Reader")
                finally:
                    self._logger.info(
                        "Control is leaving notification reader loop after processing %s "
                        "notifications", numHandledNotifications)
def createHTMModel(modelId, params):
    """ Dispatch command to create HTM model

  :param modelId: unique identifier of the metric row

  :param modelParams: model params for creating a scalar model per ModelSwapper
    interface

  :param modelSwapper: htmengine.model_swapper.model_swapper_interface object
  """
    with ModelSwapperInterface() as modelSwapper:
        modelSwapper.defineModel(modelID=modelId,
                                 args=params,
                                 commandID=createGuid())
Esempio n. 9
0
    def testStartModelRunnerAndStopIt(self):
        # Simple test that starts a ModelRunner and stops it gracefully
        # TODO send command to model and verify output

        modelID = "abcdef"

        with ModelSwapperInterface() as swapper:
            modelInputMQ = swapper._getModelInputQName(modelID=modelID)

        with amqp_test_utils.managedQueueDeleter(modelInputMQ):
            with MessageBusConnector() as bus:
                bus.createMessageQueue(modelInputMQ, durable=True)

            runner = slot_agent.ModelRunnerProxy(modelID=modelID,
                                                 onTermination=lambda: None,
                                                 logger=_LOGGER)

            returnCode = runner.stopGracefully()

            self.assertEqual(returnCode, 0)
    def testModelSwapper(self):
        """Simple end-to-end test of the model swapper system."""

        modelSchedulerSubprocess = self._startModelSchedulerSubprocess()
        self.addCleanup(lambda: modelSchedulerSubprocess.kill() if
                        modelSchedulerSubprocess.returncode is None else None)

        modelID = "foobar"
        resultBatches = []

        with ModelSwapperInterface() as swapperAPI:
            possibleModels = getScalarMetricWithTimeOfDayParams(metricData=[0],
                                                                minVal=0,
                                                                maxVal=1000)

            # Submit requests including a model creation command and two data rows.
            args = possibleModels[0]
            args["inputRecordSchema"] = (
                FieldMetaInfo("c0", FieldMetaType.datetime,
                              FieldMetaSpecial.timestamp),
                FieldMetaInfo("c1", FieldMetaType.float,
                              FieldMetaSpecial.none),
            )

            # Define the model
            _LOGGER.info("Defining the model")
            swapperAPI.defineModel(modelID=modelID,
                                   args=args,
                                   commandID="defineModelCmd1")

            # Attempt to define the same model again
            _LOGGER.info("Defining the model again")
            swapperAPI.defineModel(modelID=modelID,
                                   args=args,
                                   commandID="defineModelCmd2")

            # Send input rows to the model
            inputRows = [
                ModelInputRow(
                    rowID="rowfoo",
                    data=[datetime.datetime(2013, 5, 23, 8, 13, 00), 5.3]),
                ModelInputRow(
                    rowID="rowbar",
                    data=[datetime.datetime(2013, 5, 23, 8, 13, 15), 2.4]),
            ]
            _LOGGER.info("Submitting batch of %d input rows...",
                         len(inputRows))
            swapperAPI.submitRequests(modelID=modelID, requests=inputRows)

            _LOGGER.info("These models have pending input: %s",
                         swapperAPI.getModelsWithInputPending())

            # Retrieve all results.
            # NOTE: We collect results via background thread to avoid
            # deadlocking the test runner in the event consuming blocks unexpectedly
            _LOGGER.info("Reading all batches of results...")

            numBatchesExpected = 3
            resultBatches.extend(
                self._consumeResults(numBatchesExpected, timeout=20))

            self.assertEqual(len(resultBatches), numBatchesExpected)

            with MessageBusConnector() as bus:
                # The results message queue should be empty now
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))

            # Delete the model
            _LOGGER.info("Deleting the model")
            swapperAPI.deleteModel(modelID=modelID,
                                   commandID="deleteModelCmd1")

            _LOGGER.info("Waiting for model deletion result")
            resultBatches.extend(self._consumeResults(1, timeout=20))

            self.assertEqual(len(resultBatches), 4)

            with MessageBusConnector() as bus:
                # The results message queue should be empty now
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))

                # The model input queue should be deleted now
                self.assertFalse(
                    bus.isMessageQeueuePresent(
                        swapperAPI._getModelInputQName(modelID=modelID)))

            # Try deleting the model again, to make sure there are no exceptions
            _LOGGER.info("Attempting to delete the model again")
            swapperAPI.deleteModel(modelID=modelID,
                                   commandID="deleteModelCmd1")

        # Verify results

        # First result batch should be the first defineModel result
        batch = resultBatches[0]
        self.assertEqual(batch.modelID, modelID)
        self.assertEqual(len(batch.objects), 1)

        result = batch.objects[0]
        self.assertIsInstance(result, ModelCommandResult)
        self.assertEqual(result.method, "defineModel")
        self.assertEqual(result.status, htmengineerrno.SUCCESS)
        self.assertEqual(result.commandID, "defineModelCmd1")

        # The second result batch should for the second defineModel result for the
        # same model
        batch = resultBatches[1]
        self.assertEqual(batch.modelID, modelID)
        self.assertEqual(len(batch.objects), 1)

        result = batch.objects[0]
        self.assertIsInstance(result, ModelCommandResult)
        self.assertEqual(result.method, "defineModel")
        self.assertEqual(result.status, htmengineerrno.SUCCESS)
        self.assertEqual(result.commandID, "defineModelCmd2")

        # The third batch should be for the two input rows
        batch = resultBatches[2]
        self.assertEqual(batch.modelID, modelID)
        self.assertEqual(len(batch.objects), len(inputRows))

        for inputRow, result in zip(inputRows, batch.objects):
            self.assertIsInstance(result, ModelInferenceResult)
            self.assertEqual(result.status, htmengineerrno.SUCCESS)
            self.assertEqual(result.rowID, inputRow.rowID)
            self.assertIsInstance(result.anomalyScore, float)

        # The fourth batch should be for the "deleteModel"
        batch = resultBatches[3]
        self.assertEqual(batch.modelID, modelID)
        self.assertEqual(len(batch.objects), 1)

        result = batch.objects[0]
        self.assertIsInstance(result, ModelCommandResult)
        self.assertEqual(result.method, "deleteModel")
        self.assertEqual(result.status, htmengineerrno.SUCCESS)
        self.assertEqual(result.commandID, "deleteModelCmd1")

        # Signal Model Scheduler Service subprocess to shut down and wait for it
        waitResult = dict()

        def runWaiterThread():
            try:
                waitResult["returnCode"] = modelSchedulerSubprocess.wait()
            except:
                _LOGGER.exception(
                    "Waiting for modelSchedulerSubprocess failed")
                waitResult["exceptionInfo"] = traceback.format_exc()
                raise
            return

        modelSchedulerSubprocess.terminate()
        waiterThread = threading.Thread(target=runWaiterThread)
        waiterThread.setDaemon(True)
        waiterThread.start()
        waiterThread.join(timeout=30)
        self.assertFalse(waiterThread.isAlive())

        self.assertEqual(waitResult["returnCode"], 0, msg=repr(waitResult))
Esempio n. 11
0
    def testCollectAndPublishMetrics(self):
        # Start Metric Collector, create a set of Metrics, wait for it to collect
        # some metrics and to publish them to the metric_exchange, then validate
        # attributes of the published metrics.
        #
        # TODO Add more metric types
        # TODO Deeper validation of the published metrics

        # Start our own instance of metric collector and wait for data points
        with self._startModelSchedulerSubprocess() as modelSchedulerSubprocess, \
            self._startMetricCollectorSubprocess() as metricCollectorSubprocess:
            # Create some models for metric collector to harvest
            region = "us-west-2"
            namespace = "AWS/EC2"
            resourceType = ResourceTypeNames.EC2_INSTANCE

            engine = repository.engineFactory()
            adapter = createCloudwatchDatasourceAdapter()

            ec2Instances = adapter.describeResources(region=region,
                                                     resourceType=resourceType)

            self.assertGreater(len(ec2Instances), 0)

            maxModels = 10

            ec2Instances = ec2Instances[:min(maxModels, Quota.getInstanceQuota(
            ))]

            metricInstances = []

            _LOGGER.info("Starting %d models", len(ec2Instances))
            self.assertGreater(len(ec2Instances), 0)
            for ec2Instance in ec2Instances:

                metricSpec = {
                    "region": region,
                    "namespace": namespace,
                    "metric": "CPUUtilization",
                    "dimensions": {
                        "InstanceId": ec2Instance["resID"]
                    }
                }

                modelSpec = {
                    "datasource": "cloudwatch",
                    "metricSpec": metricSpec
                }

                metricId = adapter.monitorMetric(modelSpec)

                with engine.connect() as conn:
                    repository.setMetricStatus(conn, metricId,
                                               MetricStatus.ACTIVE)

                metricInstances.append(metricId)

            _LOGGER.info("Waiting for results from models...")

            seenMetricIDs = set()
            allMetricIDs = set(metricInstances)

            # Register a timeout so we won't deadlock the test
            def onTimeout(resultsQueueName):
                _LOGGER.error(
                    "Timed out waiting to get results from models; numResults=%d; "
                    "expected=%d", len(seenMetricIDs), len(allMetricIDs))

                # HACK delete model swapper results queue to abort the consumer
                try:
                    with MessageBusConnector() as bus:
                        bus.deleteMessageQueue(resultsQueueName)
                except Exception:
                    _LOGGER.exception("Failed to delete results mq=%s",
                                      resultsQueueName)
                    raise

            with ModelSwapperInterface() as modelSwapper:
                with modelSwapper.consumeResults() as consumer:
                    timer = threading.Timer(
                        120, onTimeout, args=[modelSwapper._resultsQueueName])
                    timer.start()
                    try:
                        for batch in consumer:
                            seenMetricIDs.add(batch.modelID)
                            batch.ack()
                            if seenMetricIDs == allMetricIDs:
                                break
                        else:
                            self.fail(
                                "Expected %d results, but got only %d: %s" % (
                                    len(allMetricIDs),
                                    len(seenMetricIDs),
                                    seenMetricIDs,
                                ))
                        _LOGGER.info("Got %d results from models",
                                     len(seenMetricIDs))
                    finally:
                        timer.cancel()

            # Terminate metric_collector subprocess gracefully to avoid too much
            # error logging junk on the terminal
            metricCollectorSubprocess.send_signal(signal.SIGINT)

            # Terminate metric_collector subprocess gracefully to avoid too much
            # error logging junk on the terminal
            modelSchedulerSubprocess.send_signal(signal.SIGINT)
Esempio n. 12
0
  def run(self):
    """ Collect metric data and status for active metrics
    """
    # NOTE: the process pool must be created BEFORE this main (parent) process
    # creates any global or class-level shared resources (e.g., boto
    # connection) that would have undersirable consequences when
    # replicated into and used by forked child processes (e.g., the same MySQL
    # connection socket file descriptor used by multiple processes). And we
    # can't take advantage of the process Pool's maxtasksperchild feature
    # either (for the same reason)
    self._log.info("Starting grok Metric Collector")
    resultsQueue = multiprocessing.Manager().JoinableQueue()

    recvPipe, sendPipe = multiprocessing.Pipe(False)

    processPool = multiprocessing.Pool(
      processes=self._WORKER_PROCESS_POOL_SIZE,
      maxtasksperchild=None)

    try:
      with ModelSwapperInterface() as modelSwapper:
        engine = repository.engineFactory()
        while True:
          startTime = time.time()

          if startTime > self._nextCacheGarbageCollectionTime:
            # TODO: unit-test
            self._garbageCollectInfoCache()

          # Determine which metrics are due for an update
          metricsToUpdate = self._getCandidateMetrics(engine)

          filterDuration = time.time() - startTime

          if not metricsToUpdate:
            time.sleep(self._NO_PENDING_METRICS_SLEEP_SEC)
            continue

          # Collect metric data
          collectionStartTime = time.time()

          poolResults = self._collectDataForMetrics(metricsToUpdate,
                                                    processPool,
                                                    resultsQueue)

          # Process/dispatch results in parallel in another thread as results
          # become available in resultsQueue
          dispatchThread = (
            threading.Thread(target=self._processAndDispatchThreadTarget,
                             args=(engine,
                                   metricsToUpdate,
                                   resultsQueue,
                                   modelSwapper,
                                   sendPipe)))
          dispatchStartTime = time.time()
          dispatchThread.start()

          # Syncronize with processPool
          poolResults.wait() # Wait for collection tasks to complete

          metricPollDuration = time.time() - collectionStartTime

          resultsQueue.join() # Block until all tasks completed...

          # Syncronize with dispatchThread
          resultsQueue.put(self._SENTINEL) # Signal to dispatchThread that
                                           # there are no more results to
                                           # process.
          resultsQueue.join()
          numEmpty, numErrors = recvPipe.recv() # Get dispatchThread stats

          dispatchDuration = time.time() - dispatchStartTime

          self._log.info(
            "Processed numMetrics=%d; numEmpty=%d; numErrors=%d; "
            "duration=%.4fs (filter=%.4fs; query=%.4fs; dispatch=%.4fs)",
            len(metricsToUpdate), numEmpty, numErrors,
            time.time() - startTime, filterDuration,
            metricPollDuration, dispatchDuration)
    finally:
      self._log.info("Exiting Metric Collector run-loop")
      processPool.terminate()
      processPool.join()
Esempio n. 13
0
    def run(self):
        """
    Consumes pending results.  Once result batch arrives, it will be dispatched
    to the correct model command result handler.

    :see: `_processModelCommandResult` and `_processModelInferenceResults`
    """
        # Properties for publishing model command results on RabbitMQ exchange
        modelCommandResultProperties = MessageProperties(
            deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE,
            headers=dict(dataType="model-cmd-result"))

        # Properties for publishing model inference results on RabbitMQ exchange
        modelInferenceResultProperties = MessageProperties(
            deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE)

        # Declare an exchange for forwarding our results
        with amqp.synchronous_amqp_client.SynchronousAmqpClient(
                amqp.connection.getRabbitmqConnectionParameters(
                )) as amqpClient:
            amqpClient.declareExchange(self._modelResultsExchange,
                                       exchangeType="fanout",
                                       durable=True)

        with ModelSwapperInterface() as modelSwapper, MessageBusConnector(
        ) as bus:
            with modelSwapper.consumeResults() as consumer:
                for batch in consumer:
                    if self._profiling:
                        batchStartTime = time.time()

                    inferenceResults = []
                    for result in batch.objects:
                        try:
                            if isinstance(result, ModelCommandResult):
                                self._processModelCommandResult(
                                    batch.modelID, result)
                                # Construct model command result message for consumption by
                                # downstream processes
                                try:
                                    cmdResultMessage = self._composeModelCommandResultMessage(
                                        modelID=batch.modelID,
                                        cmdResult=result)
                                except (ObjectNotFoundError,
                                        MetricNotMonitoredError):
                                    pass
                                else:
                                    bus.publishExg(
                                        exchange=self._modelResultsExchange,
                                        routingKey="",
                                        body=self._serializeModelResult(
                                            cmdResultMessage),
                                        properties=modelCommandResultProperties
                                    )
                            elif isinstance(result, ModelInferenceResult):
                                inferenceResults.append(result)
                            else:
                                self._log.error("Unsupported ModelResult=%r",
                                                result)
                        except ObjectNotFoundError:
                            self._log.exception(
                                "Error processing result=%r "
                                "from model=%s", result, batch.modelID)

                    if inferenceResults:
                        result = self._processModelInferenceResults(
                            inferenceResults, metricID=batch.modelID)

                        if result is not None:
                            # Construct model results payload for consumption by
                            # downstream processes
                            metricRow, dataRows = result
                            resultsMessage = self._composeModelInferenceResultsMessage(
                                metricRow, dataRows)

                            payload = self._serializeModelResult(
                                resultsMessage)

                            bus.publishExg(
                                exchange=self._modelResultsExchange,
                                routingKey="",
                                body=payload,
                                properties=modelInferenceResultProperties)

                    batch.ack()

                    if self._profiling:
                        if inferenceResults:
                            if result is not None:
                                # pylint: disable=W0633
                                metricRow, rows = result
                                rowIdRange = ("%s..%s" %
                                              (rows[0].rowid, rows[-1].rowid)
                                              if len(rows) > 1 else str(
                                                  rows[0].rowid))
                                self._log.info(
                                    "{TAG:ANOM.BATCH.INF.DONE} model=%s; "
                                    "numItems=%d; rows=[%s]; tailRowTS=%s; duration=%.4fs; "
                                    "ds=%s; name=%s", batch.modelID,
                                    len(batch.objects), rowIdRange,
                                    rows[-1].timestamp.isoformat() + "Z",
                                    time.time() - batchStartTime,
                                    metricRow.datasource, metricRow.name)
                        else:
                            self._log.info(
                                "{TAG:ANOM.BATCH.CMD.DONE} model=%s; "
                                "numItems=%d; duration=%.4fs", batch.modelID,
                                len(batch.objects),
                                time.time() - batchStartTime)

        self._log.info("Stopped processing model results")
    def _auxTestRunModelWithFullThenIncrementalCheckpoints(
            self, classifierEnabled):
        modelID = "foobar"
        checkpointMgr = model_checkpoint_mgr.ModelCheckpointMgr()

        args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],
                                                         minVal=0,
                                                         maxVal=1000)

        args["modelConfig"]["modelParams"]["clEnable"] = classifierEnabled

        # Submit requests including a model creation command and two data rows.
        args["inputRecordSchema"] = (
            FieldMetaInfo("c0", FieldMetaType.datetime,
                          FieldMetaSpecial.timestamp),
            FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none),
        )

        with ModelSwapperInterface() as swapperAPI:
            # Define the model
            _LOGGER.info("Defining the model")
            swapperAPI.defineModel(modelID=modelID,
                                   args=args,
                                   commandID="defineModelCmd1")
            # Send input rows to the model
            inputRows = [
                ModelInputRow(
                    rowID="rowfoo",
                    data=[datetime.datetime(2014, 5, 23, 8, 13, 00), 5.3]),
                ModelInputRow(
                    rowID="rowbar",
                    data=[datetime.datetime(2014, 5, 23, 8, 13, 15), 2.4]),
            ]
            _LOGGER.info(
                "Submitting batch of %d input rows with ids=[%s..%s]...",
                len(inputRows), inputRows[0].rowID, inputRows[-1].rowID)
            swapperAPI.submitRequests(modelID=modelID, requests=inputRows)
            # Run model_runner and collect results
            with self._startModelRunnerSubprocess(
                    modelID) as modelRunnerProcess:
                resultBatches = self._consumeResults(numExpectedBatches=2,
                                                     timeout=15)
                self._waitForProcessToStopAndCheck(modelRunnerProcess)
            with MessageBusConnector() as bus:
                # The results message queue should be empty now
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))
            self.assertEqual(len(resultBatches), 2, repr(resultBatches))
            # First result batch should be the first defineModel result
            batch = resultBatches[0]
            self.assertEqual(batch.modelID, modelID)
            self.assertEqual(len(batch.objects), 1)
            result = batch.objects[0]
            self.assertIsInstance(result, ModelCommandResult)
            self.assertEqual(result.method, "defineModel")
            self.assertEqual(result.status, htmengineerrno.SUCCESS)
            self.assertEqual(result.commandID, "defineModelCmd1")
            # The second result batch should be for the two input rows
            batch = resultBatches[1]
            self.assertEqual(batch.modelID, modelID)
            self.assertEqual(len(batch.objects), len(inputRows))
            for inputRow, result in zip(inputRows, batch.objects):
                self.assertIsInstance(result, ModelInferenceResult)
                self.assertEqual(result.status, htmengineerrno.SUCCESS)
                self.assertEqual(result.rowID, inputRow.rowID)
                self.assertIsInstance(result.anomalyScore, float)
                if classifierEnabled:
                    self.assertIsInstance(result.multiStepBestPredictions,
                                          dict)
                else:
                    self.assertIsNone(result.multiStepBestPredictions)

            # Verify model checkpoint
            model = checkpointMgr.load(modelID)
            del model
            attrs = checkpointMgr.loadCheckpointAttributes(modelID)
            self.assertIn(
                model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME,
                attrs,
                msg=repr(attrs))
            self.assertEqual(len(attrs[
                model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME]),
                             2,
                             msg=repr(attrs))
            self.assertNotIn(model_runner._ModelArchiver.
                             _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME,
                             attrs,
                             msg=repr(attrs))
            # Now, check incremental checkpointing
            inputRows2 = [
                ModelInputRow(
                    rowID=2,
                    data=[datetime.datetime(2014, 5, 23, 8, 13, 20), 2.7]),
                ModelInputRow(
                    rowID=3,
                    data=[datetime.datetime(2014, 5, 23, 8, 13, 25), 3.9]),
            ]
            _LOGGER.info(
                "Submitting batch of %d input rows with ids=[%s..%s]...",
                len(inputRows2), inputRows2[0].rowID, inputRows2[-1].rowID)
            inputBatchID = swapperAPI.submitRequests(modelID=modelID,
                                                     requests=inputRows2)
            with self._startModelRunnerSubprocess(
                    modelID) as modelRunnerProcess:
                resultBatches = self._consumeResults(numExpectedBatches=1,
                                                     timeout=15)
                self._waitForProcessToStopAndCheck(modelRunnerProcess)
            with MessageBusConnector() as bus:
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))
            batch = resultBatches[0]
            self.assertEqual(batch.modelID, modelID)
            self.assertEqual(len(batch.objects), len(inputRows2))
            for inputRow, result in zip(inputRows2, batch.objects):
                self.assertIsInstance(result, ModelInferenceResult)
                self.assertEqual(result.status, htmengineerrno.SUCCESS)
                self.assertEqual(result.rowID, inputRow.rowID)
                self.assertIsInstance(result.anomalyScore, float)
                if classifierEnabled:
                    self.assertIsInstance(result.multiStepBestPredictions,
                                          dict)
                else:
                    self.assertIsNone(result.multiStepBestPredictions)

            model = checkpointMgr.load(modelID)
            del model
            attrs = checkpointMgr.loadCheckpointAttributes(modelID)
            self.assertIn(
                model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME,
                attrs,
                msg=repr(attrs))
            self.assertSequenceEqual(attrs[
                model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME],
                                     [inputBatchID],
                                     msg=repr(attrs))
            self.assertIn(model_runner._ModelArchiver.
                          _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME,
                          attrs,
                          msg=repr(attrs))
            self.assertSequenceEqual(
                model_runner._ModelArchiver._decodeDataSamples(
                    attrs[model_runner._ModelArchiver.
                          _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]),
                [row.data for row in inputRows2],
                msg=repr(attrs))
            # Final run with incremental checkpointing
            inputRows3 = [
                ModelInputRow(
                    rowID=4,
                    data=[datetime.datetime(2014, 5, 23, 8, 13, 30), 4.7]),
                ModelInputRow(
                    rowID=5,
                    data=[datetime.datetime(2014, 5, 23, 8, 13, 35), 5.9]),
            ]
            _LOGGER.info(
                "Submitting batch of %d input rows with ids=[%s..%s]...",
                len(inputRows3), inputRows3[0].rowID, inputRows3[-1].rowID)
            inputBatchID = swapperAPI.submitRequests(modelID=modelID,
                                                     requests=inputRows3)
            with self._startModelRunnerSubprocess(
                    modelID) as modelRunnerProcess:
                resultBatches = self._consumeResults(numExpectedBatches=1,
                                                     timeout=15)
                self._waitForProcessToStopAndCheck(modelRunnerProcess)
            with MessageBusConnector() as bus:
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))
            batch = resultBatches[0]
            self.assertEqual(batch.modelID, modelID)
            self.assertEqual(len(batch.objects), len(inputRows3))
            for inputRow, result in zip(inputRows3, batch.objects):
                self.assertIsInstance(result, ModelInferenceResult)
                self.assertEqual(result.status, htmengineerrno.SUCCESS)
                self.assertEqual(result.rowID, inputRow.rowID)
                self.assertIsInstance(result.anomalyScore, float)
                if classifierEnabled:
                    self.assertIsInstance(result.multiStepBestPredictions,
                                          dict)
                else:
                    self.assertIsNone(result.multiStepBestPredictions)

            model = checkpointMgr.load(modelID)
            del model
            attrs = checkpointMgr.loadCheckpointAttributes(modelID)
            self.assertIn(
                model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME,
                attrs,
                msg=repr(attrs))
            self.assertSequenceEqual(attrs[
                model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME],
                                     [inputBatchID],
                                     msg=repr(attrs))
            self.assertIn(model_runner._ModelArchiver.
                          _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME,
                          attrs,
                          msg=repr(attrs))
            self.assertSequenceEqual(
                model_runner._ModelArchiver._decodeDataSamples(
                    attrs[model_runner._ModelArchiver.
                          _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]),
                [row.data for row in itertools.chain(inputRows2, inputRows3)],
                msg=repr(attrs))
            # Delete the model
            _LOGGER.info("Deleting the model=%s", modelID)
            swapperAPI.deleteModel(modelID=modelID,
                                   commandID="deleteModelCmd1")
            with self._startModelRunnerSubprocess(
                    modelID) as modelRunnerProcess:
                resultBatches = self._consumeResults(numExpectedBatches=1,
                                                     timeout=15)
                self._waitForProcessToStopAndCheck(modelRunnerProcess)
            self.assertEqual(len(resultBatches), 1, repr(resultBatches))
            # First result batch should be the first defineModel result
            batch = resultBatches[0]
            self.assertEqual(batch.modelID, modelID)
            self.assertEqual(len(batch.objects), 1)
            result = batch.objects[0]
            self.assertIsInstance(result, ModelCommandResult)
            self.assertEqual(result.method, "deleteModel")
            self.assertEqual(result.status, htmengineerrno.SUCCESS)
            self.assertEqual(result.commandID, "deleteModelCmd1")
            with MessageBusConnector() as bus:
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))

                # The model input queue should be deleted now
                self.assertFalse(
                    bus.isMessageQeueuePresent(
                        swapperAPI._getModelInputQName(modelID=modelID)))

            # The model checkpoint should be gone too
            with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
                checkpointMgr.load(modelID)
            with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
                checkpointMgr.loadModelDefinition(modelID)
            with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
                checkpointMgr.loadCheckpointAttributes(modelID)
            with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
                checkpointMgr.remove(modelID)
def deleteHTMModel(modelId):
    with ModelSwapperInterface() as modelSwapper:
        modelSwapper.deleteModel(modelID=modelId, commandID=createGuid())
    def testCloneModel(self):

        modelSchedulerSubprocess = self._startModelSchedulerSubprocess()
        self.addCleanup(lambda: modelSchedulerSubprocess.kill() if
                        modelSchedulerSubprocess.returncode is None else None)

        modelID = "abc"
        destModelID = "def"

        resultBatches = []

        with ModelSwapperInterface() as swapperAPI:
            args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],
                                                             minVal=0,
                                                             maxVal=1000)

            # Submit requests including a model creation command and two data rows.
            args["inputRecordSchema"] = (
                FieldMetaInfo("c0", FieldMetaType.datetime,
                              FieldMetaSpecial.timestamp),
                FieldMetaInfo("c1", FieldMetaType.float,
                              FieldMetaSpecial.none),
            )

            # Define the model
            _LOGGER.info("Defining the model")
            swapperAPI.defineModel(modelID=modelID,
                                   args=args,
                                   commandID="defineModelCmd1")

            resultBatches.extend(self._consumeResults(1, timeout=20))
            self.assertEqual(len(resultBatches), 1)

            # Clone the just-defined model
            _LOGGER.info("Cloning model")
            swapperAPI.cloneModel(modelID,
                                  destModelID,
                                  commandID="cloneModelCmd1")

            resultBatches.extend(self._consumeResults(1, timeout=20))
            self.assertEqual(len(resultBatches), 2)

            # Send input rows to the clone
            inputRows = [
                ModelInputRow(
                    rowID="rowfoo",
                    data=[datetime.datetime(2013, 5, 23, 8, 13, 00), 5.3]),
                ModelInputRow(
                    rowID="rowbar",
                    data=[datetime.datetime(2013, 5, 23, 8, 13, 15), 2.4]),
            ]
            _LOGGER.info("Submitting batch of %d input rows...",
                         len(inputRows))
            swapperAPI.submitRequests(modelID=destModelID, requests=inputRows)

            _LOGGER.info("These models have pending input: %s",
                         swapperAPI.getModelsWithInputPending())

            resultBatches.extend(self._consumeResults(1, timeout=20))
            self.assertEqual(len(resultBatches), 3)

            with MessageBusConnector() as bus:
                # The results message queue should be empty now
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))

            # Delete the model
            _LOGGER.info("Deleting the model")
            swapperAPI.deleteModel(modelID=destModelID,
                                   commandID="deleteModelCmd1")

            _LOGGER.info("Waiting for model deletion result")
            resultBatches.extend(self._consumeResults(1, timeout=20))

            self.assertEqual(len(resultBatches), 4)

            with MessageBusConnector() as bus:
                # The results message queue should be empty now
                self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName))

                # The model input queue should be deleted now
                self.assertFalse(
                    bus.isMessageQeueuePresent(
                        swapperAPI._getModelInputQName(modelID=destModelID)))

        # Verify results

        # First result batch should be the defineModel result
        batch = resultBatches[0]
        self.assertEqual(batch.modelID, modelID)
        self.assertEqual(len(batch.objects), 1)

        result = batch.objects[0]
        self.assertIsInstance(result, ModelCommandResult)
        self.assertEqual(result.method, "defineModel")
        self.assertEqual(result.status, htmengineerrno.SUCCESS)
        self.assertEqual(result.commandID, "defineModelCmd1")

        # The second result batch should for the cloneModel result
        batch = resultBatches[1]
        self.assertEqual(batch.modelID, modelID)
        self.assertEqual(len(batch.objects), 1)

        result = batch.objects[0]
        self.assertIsInstance(result, ModelCommandResult)
        self.assertEqual(result.method, "cloneModel")
        self.assertEqual(result.status, htmengineerrno.SUCCESS)
        self.assertEqual(result.commandID, "cloneModelCmd1")

        # The third batch should be for the two input rows
        batch = resultBatches[2]
        self.assertEqual(batch.modelID, destModelID)
        self.assertEqual(len(batch.objects), len(inputRows))

        for inputRow, result in zip(inputRows, batch.objects):
            self.assertIsInstance(result, ModelInferenceResult)
            self.assertEqual(result.status, htmengineerrno.SUCCESS)
            self.assertEqual(result.rowID, inputRow.rowID)
            self.assertIsInstance(result.anomalyScore, float)
            self.assertIsInstance(result.multiStepBestPredictions, dict)

        # The fourth batch should be for the "deleteModel"
        batch = resultBatches[3]
        self.assertEqual(batch.modelID, destModelID)
        self.assertEqual(len(batch.objects), 1)

        result = batch.objects[0]
        self.assertIsInstance(result, ModelCommandResult)
        self.assertEqual(result.method, "deleteModel")
        self.assertEqual(result.status, htmengineerrno.SUCCESS)
        self.assertEqual(result.commandID, "deleteModelCmd1")

        # Signal Model Scheduler Service subprocess to shut down and wait for it
        waitResult = dict()

        def runWaiterThread():
            try:
                waitResult["returnCode"] = modelSchedulerSubprocess.wait()
            except:
                _LOGGER.exception(
                    "Waiting for modelSchedulerSubprocess failed")
                waitResult["exceptionInfo"] = traceback.format_exc()
                raise
            return

        modelSchedulerSubprocess.terminate()
        waiterThread = threading.Thread(target=runWaiterThread)
        waiterThread.setDaemon(True)
        waiterThread.start()
        waiterThread.join(timeout=30)
        self.assertFalse(waiterThread.isAlive())

        self.assertEqual(waitResult["returnCode"], 0, msg=repr(waitResult))