def __init__(self, modelID): """ :param modelID: model ID; string """ self._logger = _getLogger() self._modelID = modelID self._swapperAPI = ModelSwapperInterface() self._archiver = _ModelArchiver(self._modelID) # "deleteModel" command handler sets this flag to force our processing # loop to terminate self._done = False modelSwapperConfig = ModelSwapperConfig() self._targetMaxRequestsPerCheckpoint = modelSwapperConfig.getint( "model_runner", "target_requests_per_checkpoint") self._profiling = (modelSwapperConfig.getboolean( "debugging", "profiling") or self._logger.isEnabledFor(logging.DEBUG)) if self._profiling: self._logger.info("Profiling is turned on") self._modelLoadSec = 0
def run(self): with ModelSwapperInterface() as modelSwapper: engine = repository.engineFactory() while True: with engine.connect() as conn: pendingStacks = repository.retryOnTransientErrors( repository.getAutostackMetricsPendingDataCollection)( conn) if not pendingStacks: time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC) continue # Build a sequence of autostack metric requests requests = [] for autostack, metrics in pendingStacks: refBase = len(requests) requests.extend( AutostackMetricRequest(refID=refBase + i, autostack=autostack, metric=metric) for i, metric in enumerate(metrics)) # Collect, aggregate, and stream metric data self._processAutostackMetricRequests(engine, requests, modelSwapper)
def __init__(self, concurrency): """ concurrency: allowed number of model slots """ self._logger = _getLogger() self._profiling = (ModelSwapperConfig().getboolean( "debugging", "profiling") or self._logger.isEnabledFor(logging.DEBUG)) # Allowed number of model slots self._concurrency = concurrency # Input-reader thread target function sets this when it starts running to # let our event loop know that things are off to a good start self._notificationReaderStartedEvent = threading.Event() self._notificationMutex = threading.Lock() # Mutex used to guaranteed that no further model input notifications will # be added to main event queue once self._stopNotificationReader is set # _runNotificationReaderThread will not process any more notifications # once it detects that this flag is true self._stopNotificationReader = False # The event loop will exit some time after an event handler sets this flag # to True self._eventLoopStopPending = False # (non-thread-safe) The tuple of all slot agents self._slotAgents = tuple( SlotAgent(slotID=i) for i in xrange(concurrency)) assert self._slotAgents # Thread-safe event queue for SwapController self._eventQ = Queue.Queue() # Main event loop's ModelSwapperInterface instance. MUST NOT use from # threads because ModelSwapperInterface self._mainSwapper = ModelSwapperInterface() # A (non-thread-safe) FIFO of models that are waiting to be scheduled for # running; there is incoming data for them that needs to be processed self._waitingModelsFIFO = [] # A (non-thread-safe) map of modelIDs to _RunningModelInfo instances self._runningModelsMap = dict() # A (non-thread-safe) list of free slot indexes into the self._slotsAgents # tuple self._freeSlots = list(xrange(len(self._slotAgents))) # (non-thread-safe) Indexes of SlotAgents pending preemption self._pendingPreemptSlotsSet = set() self._notificationReaderThread = threading.Thread( target=self._runNotificationReaderThread, name="%s-input-reader-%s" % (self.__class__.__name__, id(self))) # Allow process to exit even if thread is still running self._notificationReaderThread.setDaemon(True)
def testStartMultipleModelRunnersAndStopThem(self): # Starts several ModelRunners and stops them gracefully # to confirm that they can all stop without conflicting with each other: # if ModelRunnerProxy doesn't configure subprocess.Popen with # `close_fds=True`, then graceful shutdown will fail because the stdin # of some child processes will be cloned into those that are started # after them and closing stding of an earlier ModelRunner child process # won't have the desired effect of terminating that process (since other # clones of that file descriptor will prevent it from fully closing) # # TODO send commands to models and verify output runners = [] modelIDs = tuple("abcdef" + str(i) for i in xrange(5)) with ModelSwapperInterface() as swapper: modelInputMQs = tuple( swapper._getModelInputQName(modelID=modelID) for modelID in modelIDs) with amqp_test_utils.managedQueueDeleter(modelInputMQs): with MessageBusConnector() as bus: for mq in modelInputMQs: bus.createMessageQueue(mq, durable=True) for modelID in modelIDs: runners.append( slot_agent.ModelRunnerProxy(modelID=modelID, onTermination=lambda: None, logger=_LOGGER)) returnCodes = [runner.stopGracefully() for runner in runners] self.assertEqual(returnCodes, [0] * len(runners))
def runServer(): # Get the current list of custom metrics appConfig = Config("application.conf", os.environ["APPLICATION_CONFIG_PATH"]) engine = repository.engineFactory(appConfig) global gCustomMetrics now = datetime.datetime.utcnow() with engine.connect() as conn: gCustomMetrics = dict( (m.name, [m, now]) for m in repository.getCustomMetrics(conn)) queueName = appConfig.get("metric_listener", "queue_name") global gProfiling gProfiling = (appConfig.getboolean("debugging", "profiling") or LOGGER.isEnabledFor(logging.DEBUG)) del appConfig metricStreamer = MetricStreamer() modelSwapper = ModelSwapperInterface() with MessageBusConnector() as bus: if not bus.isMessageQeueuePresent(queueName): bus.createMessageQueue(mqName=queueName, durable=True) LOGGER.info("Waiting for messages. To exit, press CTRL+C") with bus.consume(queueName) as consumer: messages = [] messageRxTimes = [] while True: message = consumer.pollOneMessage() if message is not None: messages.append(message) if gProfiling: messageRxTimes.append(time.time()) if message is None or len(messages) >= MAX_MESSAGES_PER_BATCH: if messages: # Process the batch try: _handleBatch(engine, messages, messageRxTimes, metricStreamer, modelSwapper) except Exception: # pylint: disable=W0703 LOGGER.exception("Unknown failure in processing messages.") # Make sure that we ack messages when there is an unexpected error # to avoid getting hung forever on one bad record. # Ack all the messages messages[-1].ack(multiple=True) # Clear the message buffer messages = [] messageRxTimes = [] else: # Queue is empty, wait before retrying time.sleep(POLL_DELAY_SEC)
def runConsumerThread(destList, numExpected): with ModelSwapperInterface() as swapper: with swapper.consumeResults() as consumer: for batch in consumer: destList.append(batch) batch.ack() _LOGGER.info("Got result batch=%r", batch) if len(destList) == numExpected: break
def _runNotificationReaderThread(self): """ Read model data notifications and pass them to the event loop """ self._logger.info("Notification Reader thread is running") # Let the main event loop know that this thread started successfully self._notificationReaderStartedEvent.set() with ModelSwapperInterface() as swapperAPI: # First, make sure our notification message queue exists, so we don't # miss any new notifications while we're checking for models with pending # input self._logger.info("SWAPPER_API: %r", swapperAPI) swapperAPI.initSchedulerNotification() # At start, notify main event loop of each model whose input is non-empty self._logger.info("Checking for models with pending input") i = 0 for i, modelID in enumerate(swapperAPI.getModelsWithInputPending(), 1): self._logger.debug("Input pending for model=%s", modelID) self._newInputNotifyTS(modelID=modelID) self._logger.info("%s model(s) had pending input", i) # Service the SwapController's input queue util stop is requested with swapperAPI.consumeModelSchedulerNotifications() as consumer: numHandledNotifications = 0 try: for notification in consumer: with self._notificationMutex: if self._stopNotificationReader: self._logger.info( "Notification reader exiting due to stop request" ) break self._newInputNotifyTS(modelID=notification.value) notification.ack() numHandledNotifications += 1 else: raise Exception( "Unexpected termination of consumer loop in " "Notification Reader") finally: self._logger.info( "Control is leaving notification reader loop after processing %s " "notifications", numHandledNotifications)
def createHTMModel(modelId, params): """ Dispatch command to create HTM model :param modelId: unique identifier of the metric row :param modelParams: model params for creating a scalar model per ModelSwapper interface :param modelSwapper: htmengine.model_swapper.model_swapper_interface object """ with ModelSwapperInterface() as modelSwapper: modelSwapper.defineModel(modelID=modelId, args=params, commandID=createGuid())
def testStartModelRunnerAndStopIt(self): # Simple test that starts a ModelRunner and stops it gracefully # TODO send command to model and verify output modelID = "abcdef" with ModelSwapperInterface() as swapper: modelInputMQ = swapper._getModelInputQName(modelID=modelID) with amqp_test_utils.managedQueueDeleter(modelInputMQ): with MessageBusConnector() as bus: bus.createMessageQueue(modelInputMQ, durable=True) runner = slot_agent.ModelRunnerProxy(modelID=modelID, onTermination=lambda: None, logger=_LOGGER) returnCode = runner.stopGracefully() self.assertEqual(returnCode, 0)
def testModelSwapper(self): """Simple end-to-end test of the model swapper system.""" modelSchedulerSubprocess = self._startModelSchedulerSubprocess() self.addCleanup(lambda: modelSchedulerSubprocess.kill() if modelSchedulerSubprocess.returncode is None else None) modelID = "foobar" resultBatches = [] with ModelSwapperInterface() as swapperAPI: possibleModels = getScalarMetricWithTimeOfDayParams(metricData=[0], minVal=0, maxVal=1000) # Submit requests including a model creation command and two data rows. args = possibleModels[0] args["inputRecordSchema"] = ( FieldMetaInfo("c0", FieldMetaType.datetime, FieldMetaSpecial.timestamp), FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none), ) # Define the model _LOGGER.info("Defining the model") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd1") # Attempt to define the same model again _LOGGER.info("Defining the model again") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd2") # Send input rows to the model inputRows = [ ModelInputRow( rowID="rowfoo", data=[datetime.datetime(2013, 5, 23, 8, 13, 00), 5.3]), ModelInputRow( rowID="rowbar", data=[datetime.datetime(2013, 5, 23, 8, 13, 15), 2.4]), ] _LOGGER.info("Submitting batch of %d input rows...", len(inputRows)) swapperAPI.submitRequests(modelID=modelID, requests=inputRows) _LOGGER.info("These models have pending input: %s", swapperAPI.getModelsWithInputPending()) # Retrieve all results. # NOTE: We collect results via background thread to avoid # deadlocking the test runner in the event consuming blocks unexpectedly _LOGGER.info("Reading all batches of results...") numBatchesExpected = 3 resultBatches.extend( self._consumeResults(numBatchesExpected, timeout=20)) self.assertEqual(len(resultBatches), numBatchesExpected) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # Delete the model _LOGGER.info("Deleting the model") swapperAPI.deleteModel(modelID=modelID, commandID="deleteModelCmd1") _LOGGER.info("Waiting for model deletion result") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 4) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # The model input queue should be deleted now self.assertFalse( bus.isMessageQeueuePresent( swapperAPI._getModelInputQName(modelID=modelID))) # Try deleting the model again, to make sure there are no exceptions _LOGGER.info("Attempting to delete the model again") swapperAPI.deleteModel(modelID=modelID, commandID="deleteModelCmd1") # Verify results # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd1") # The second result batch should for the second defineModel result for the # same model batch = resultBatches[1] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd2") # The third batch should be for the two input rows batch = resultBatches[2] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows)) for inputRow, result in zip(inputRows, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) # The fourth batch should be for the "deleteModel" batch = resultBatches[3] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "deleteModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "deleteModelCmd1") # Signal Model Scheduler Service subprocess to shut down and wait for it waitResult = dict() def runWaiterThread(): try: waitResult["returnCode"] = modelSchedulerSubprocess.wait() except: _LOGGER.exception( "Waiting for modelSchedulerSubprocess failed") waitResult["exceptionInfo"] = traceback.format_exc() raise return modelSchedulerSubprocess.terminate() waiterThread = threading.Thread(target=runWaiterThread) waiterThread.setDaemon(True) waiterThread.start() waiterThread.join(timeout=30) self.assertFalse(waiterThread.isAlive()) self.assertEqual(waitResult["returnCode"], 0, msg=repr(waitResult))
def testCollectAndPublishMetrics(self): # Start Metric Collector, create a set of Metrics, wait for it to collect # some metrics and to publish them to the metric_exchange, then validate # attributes of the published metrics. # # TODO Add more metric types # TODO Deeper validation of the published metrics # Start our own instance of metric collector and wait for data points with self._startModelSchedulerSubprocess() as modelSchedulerSubprocess, \ self._startMetricCollectorSubprocess() as metricCollectorSubprocess: # Create some models for metric collector to harvest region = "us-west-2" namespace = "AWS/EC2" resourceType = ResourceTypeNames.EC2_INSTANCE engine = repository.engineFactory() adapter = createCloudwatchDatasourceAdapter() ec2Instances = adapter.describeResources(region=region, resourceType=resourceType) self.assertGreater(len(ec2Instances), 0) maxModels = 10 ec2Instances = ec2Instances[:min(maxModels, Quota.getInstanceQuota( ))] metricInstances = [] _LOGGER.info("Starting %d models", len(ec2Instances)) self.assertGreater(len(ec2Instances), 0) for ec2Instance in ec2Instances: metricSpec = { "region": region, "namespace": namespace, "metric": "CPUUtilization", "dimensions": { "InstanceId": ec2Instance["resID"] } } modelSpec = { "datasource": "cloudwatch", "metricSpec": metricSpec } metricId = adapter.monitorMetric(modelSpec) with engine.connect() as conn: repository.setMetricStatus(conn, metricId, MetricStatus.ACTIVE) metricInstances.append(metricId) _LOGGER.info("Waiting for results from models...") seenMetricIDs = set() allMetricIDs = set(metricInstances) # Register a timeout so we won't deadlock the test def onTimeout(resultsQueueName): _LOGGER.error( "Timed out waiting to get results from models; numResults=%d; " "expected=%d", len(seenMetricIDs), len(allMetricIDs)) # HACK delete model swapper results queue to abort the consumer try: with MessageBusConnector() as bus: bus.deleteMessageQueue(resultsQueueName) except Exception: _LOGGER.exception("Failed to delete results mq=%s", resultsQueueName) raise with ModelSwapperInterface() as modelSwapper: with modelSwapper.consumeResults() as consumer: timer = threading.Timer( 120, onTimeout, args=[modelSwapper._resultsQueueName]) timer.start() try: for batch in consumer: seenMetricIDs.add(batch.modelID) batch.ack() if seenMetricIDs == allMetricIDs: break else: self.fail( "Expected %d results, but got only %d: %s" % ( len(allMetricIDs), len(seenMetricIDs), seenMetricIDs, )) _LOGGER.info("Got %d results from models", len(seenMetricIDs)) finally: timer.cancel() # Terminate metric_collector subprocess gracefully to avoid too much # error logging junk on the terminal metricCollectorSubprocess.send_signal(signal.SIGINT) # Terminate metric_collector subprocess gracefully to avoid too much # error logging junk on the terminal modelSchedulerSubprocess.send_signal(signal.SIGINT)
def run(self): """ Collect metric data and status for active metrics """ # NOTE: the process pool must be created BEFORE this main (parent) process # creates any global or class-level shared resources (e.g., boto # connection) that would have undersirable consequences when # replicated into and used by forked child processes (e.g., the same MySQL # connection socket file descriptor used by multiple processes). And we # can't take advantage of the process Pool's maxtasksperchild feature # either (for the same reason) self._log.info("Starting grok Metric Collector") resultsQueue = multiprocessing.Manager().JoinableQueue() recvPipe, sendPipe = multiprocessing.Pipe(False) processPool = multiprocessing.Pool( processes=self._WORKER_PROCESS_POOL_SIZE, maxtasksperchild=None) try: with ModelSwapperInterface() as modelSwapper: engine = repository.engineFactory() while True: startTime = time.time() if startTime > self._nextCacheGarbageCollectionTime: # TODO: unit-test self._garbageCollectInfoCache() # Determine which metrics are due for an update metricsToUpdate = self._getCandidateMetrics(engine) filterDuration = time.time() - startTime if not metricsToUpdate: time.sleep(self._NO_PENDING_METRICS_SLEEP_SEC) continue # Collect metric data collectionStartTime = time.time() poolResults = self._collectDataForMetrics(metricsToUpdate, processPool, resultsQueue) # Process/dispatch results in parallel in another thread as results # become available in resultsQueue dispatchThread = ( threading.Thread(target=self._processAndDispatchThreadTarget, args=(engine, metricsToUpdate, resultsQueue, modelSwapper, sendPipe))) dispatchStartTime = time.time() dispatchThread.start() # Syncronize with processPool poolResults.wait() # Wait for collection tasks to complete metricPollDuration = time.time() - collectionStartTime resultsQueue.join() # Block until all tasks completed... # Syncronize with dispatchThread resultsQueue.put(self._SENTINEL) # Signal to dispatchThread that # there are no more results to # process. resultsQueue.join() numEmpty, numErrors = recvPipe.recv() # Get dispatchThread stats dispatchDuration = time.time() - dispatchStartTime self._log.info( "Processed numMetrics=%d; numEmpty=%d; numErrors=%d; " "duration=%.4fs (filter=%.4fs; query=%.4fs; dispatch=%.4fs)", len(metricsToUpdate), numEmpty, numErrors, time.time() - startTime, filterDuration, metricPollDuration, dispatchDuration) finally: self._log.info("Exiting Metric Collector run-loop") processPool.terminate() processPool.join()
def run(self): """ Consumes pending results. Once result batch arrives, it will be dispatched to the correct model command result handler. :see: `_processModelCommandResult` and `_processModelInferenceResults` """ # Properties for publishing model command results on RabbitMQ exchange modelCommandResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE, headers=dict(dataType="model-cmd-result")) # Properties for publishing model inference results on RabbitMQ exchange modelInferenceResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE) # Declare an exchange for forwarding our results with amqp.synchronous_amqp_client.SynchronousAmqpClient( amqp.connection.getRabbitmqConnectionParameters( )) as amqpClient: amqpClient.declareExchange(self._modelResultsExchange, exchangeType="fanout", durable=True) with ModelSwapperInterface() as modelSwapper, MessageBusConnector( ) as bus: with modelSwapper.consumeResults() as consumer: for batch in consumer: if self._profiling: batchStartTime = time.time() inferenceResults = [] for result in batch.objects: try: if isinstance(result, ModelCommandResult): self._processModelCommandResult( batch.modelID, result) # Construct model command result message for consumption by # downstream processes try: cmdResultMessage = self._composeModelCommandResultMessage( modelID=batch.modelID, cmdResult=result) except (ObjectNotFoundError, MetricNotMonitoredError): pass else: bus.publishExg( exchange=self._modelResultsExchange, routingKey="", body=self._serializeModelResult( cmdResultMessage), properties=modelCommandResultProperties ) elif isinstance(result, ModelInferenceResult): inferenceResults.append(result) else: self._log.error("Unsupported ModelResult=%r", result) except ObjectNotFoundError: self._log.exception( "Error processing result=%r " "from model=%s", result, batch.modelID) if inferenceResults: result = self._processModelInferenceResults( inferenceResults, metricID=batch.modelID) if result is not None: # Construct model results payload for consumption by # downstream processes metricRow, dataRows = result resultsMessage = self._composeModelInferenceResultsMessage( metricRow, dataRows) payload = self._serializeModelResult( resultsMessage) bus.publishExg( exchange=self._modelResultsExchange, routingKey="", body=payload, properties=modelInferenceResultProperties) batch.ack() if self._profiling: if inferenceResults: if result is not None: # pylint: disable=W0633 metricRow, rows = result rowIdRange = ("%s..%s" % (rows[0].rowid, rows[-1].rowid) if len(rows) > 1 else str( rows[0].rowid)) self._log.info( "{TAG:ANOM.BATCH.INF.DONE} model=%s; " "numItems=%d; rows=[%s]; tailRowTS=%s; duration=%.4fs; " "ds=%s; name=%s", batch.modelID, len(batch.objects), rowIdRange, rows[-1].timestamp.isoformat() + "Z", time.time() - batchStartTime, metricRow.datasource, metricRow.name) else: self._log.info( "{TAG:ANOM.BATCH.CMD.DONE} model=%s; " "numItems=%d; duration=%.4fs", batch.modelID, len(batch.objects), time.time() - batchStartTime) self._log.info("Stopped processing model results")
def _auxTestRunModelWithFullThenIncrementalCheckpoints( self, classifierEnabled): modelID = "foobar" checkpointMgr = model_checkpoint_mgr.ModelCheckpointMgr() args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=0, maxVal=1000) args["modelConfig"]["modelParams"]["clEnable"] = classifierEnabled # Submit requests including a model creation command and two data rows. args["inputRecordSchema"] = ( FieldMetaInfo("c0", FieldMetaType.datetime, FieldMetaSpecial.timestamp), FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none), ) with ModelSwapperInterface() as swapperAPI: # Define the model _LOGGER.info("Defining the model") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd1") # Send input rows to the model inputRows = [ ModelInputRow( rowID="rowfoo", data=[datetime.datetime(2014, 5, 23, 8, 13, 00), 5.3]), ModelInputRow( rowID="rowbar", data=[datetime.datetime(2014, 5, 23, 8, 13, 15), 2.4]), ] _LOGGER.info( "Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows), inputRows[0].rowID, inputRows[-1].rowID) swapperAPI.submitRequests(modelID=modelID, requests=inputRows) # Run model_runner and collect results with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=2, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) self.assertEqual(len(resultBatches), 2, repr(resultBatches)) # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd1") # The second result batch should be for the two input rows batch = resultBatches[1] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows)) for inputRow, result in zip(inputRows, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) # Verify model checkpoint model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn( model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertEqual(len(attrs[ model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME]), 2, msg=repr(attrs)) self.assertNotIn(model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) # Now, check incremental checkpointing inputRows2 = [ ModelInputRow( rowID=2, data=[datetime.datetime(2014, 5, 23, 8, 13, 20), 2.7]), ModelInputRow( rowID=3, data=[datetime.datetime(2014, 5, 23, 8, 13, 25), 3.9]), ] _LOGGER.info( "Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows2), inputRows2[0].rowID, inputRows2[-1].rowID) inputBatchID = swapperAPI.submitRequests(modelID=modelID, requests=inputRows2) with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows2)) for inputRow, result in zip(inputRows2, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn( model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual(attrs[ model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME], [inputBatchID], msg=repr(attrs)) self.assertIn(model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( model_runner._ModelArchiver._decodeDataSamples( attrs[model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]), [row.data for row in inputRows2], msg=repr(attrs)) # Final run with incremental checkpointing inputRows3 = [ ModelInputRow( rowID=4, data=[datetime.datetime(2014, 5, 23, 8, 13, 30), 4.7]), ModelInputRow( rowID=5, data=[datetime.datetime(2014, 5, 23, 8, 13, 35), 5.9]), ] _LOGGER.info( "Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows3), inputRows3[0].rowID, inputRows3[-1].rowID) inputBatchID = swapperAPI.submitRequests(modelID=modelID, requests=inputRows3) with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows3)) for inputRow, result in zip(inputRows3, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn( model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual(attrs[ model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME], [inputBatchID], msg=repr(attrs)) self.assertIn(model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( model_runner._ModelArchiver._decodeDataSamples( attrs[model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]), [row.data for row in itertools.chain(inputRows2, inputRows3)], msg=repr(attrs)) # Delete the model _LOGGER.info("Deleting the model=%s", modelID) swapperAPI.deleteModel(modelID=modelID, commandID="deleteModelCmd1") with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) self.assertEqual(len(resultBatches), 1, repr(resultBatches)) # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "deleteModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "deleteModelCmd1") with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # The model input queue should be deleted now self.assertFalse( bus.isMessageQeueuePresent( swapperAPI._getModelInputQName(modelID=modelID))) # The model checkpoint should be gone too with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.load(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.loadModelDefinition(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.loadCheckpointAttributes(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.remove(modelID)
def deleteHTMModel(modelId): with ModelSwapperInterface() as modelSwapper: modelSwapper.deleteModel(modelID=modelId, commandID=createGuid())
def testCloneModel(self): modelSchedulerSubprocess = self._startModelSchedulerSubprocess() self.addCleanup(lambda: modelSchedulerSubprocess.kill() if modelSchedulerSubprocess.returncode is None else None) modelID = "abc" destModelID = "def" resultBatches = [] with ModelSwapperInterface() as swapperAPI: args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=0, maxVal=1000) # Submit requests including a model creation command and two data rows. args["inputRecordSchema"] = ( FieldMetaInfo("c0", FieldMetaType.datetime, FieldMetaSpecial.timestamp), FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none), ) # Define the model _LOGGER.info("Defining the model") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd1") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 1) # Clone the just-defined model _LOGGER.info("Cloning model") swapperAPI.cloneModel(modelID, destModelID, commandID="cloneModelCmd1") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 2) # Send input rows to the clone inputRows = [ ModelInputRow( rowID="rowfoo", data=[datetime.datetime(2013, 5, 23, 8, 13, 00), 5.3]), ModelInputRow( rowID="rowbar", data=[datetime.datetime(2013, 5, 23, 8, 13, 15), 2.4]), ] _LOGGER.info("Submitting batch of %d input rows...", len(inputRows)) swapperAPI.submitRequests(modelID=destModelID, requests=inputRows) _LOGGER.info("These models have pending input: %s", swapperAPI.getModelsWithInputPending()) resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 3) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # Delete the model _LOGGER.info("Deleting the model") swapperAPI.deleteModel(modelID=destModelID, commandID="deleteModelCmd1") _LOGGER.info("Waiting for model deletion result") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 4) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # The model input queue should be deleted now self.assertFalse( bus.isMessageQeueuePresent( swapperAPI._getModelInputQName(modelID=destModelID))) # Verify results # First result batch should be the defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd1") # The second result batch should for the cloneModel result batch = resultBatches[1] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "cloneModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "cloneModelCmd1") # The third batch should be for the two input rows batch = resultBatches[2] self.assertEqual(batch.modelID, destModelID) self.assertEqual(len(batch.objects), len(inputRows)) for inputRow, result in zip(inputRows, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) self.assertIsInstance(result.multiStepBestPredictions, dict) # The fourth batch should be for the "deleteModel" batch = resultBatches[3] self.assertEqual(batch.modelID, destModelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "deleteModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "deleteModelCmd1") # Signal Model Scheduler Service subprocess to shut down and wait for it waitResult = dict() def runWaiterThread(): try: waitResult["returnCode"] = modelSchedulerSubprocess.wait() except: _LOGGER.exception( "Waiting for modelSchedulerSubprocess failed") waitResult["exceptionInfo"] = traceback.format_exc() raise return modelSchedulerSubprocess.terminate() waiterThread = threading.Thread(target=runWaiterThread) waiterThread.setDaemon(True) waiterThread.start() waiterThread.join(timeout=30) self.assertFalse(waiterThread.isAlive()) self.assertEqual(waitResult["returnCode"], 0, msg=repr(waitResult))