def GET(self): """ Get model data stats :: GET /_models/data/stats Returns: :: { "processing_time_remaining": 37 } """ with repository.engineFactory().connect() as conn: unprocessedDataCount = repository.getUnprocessedModelDataCount(conn) processingTimeRemaining = int(math.ceil( unprocessedDataCount * _PROCESSING_TIME_PER_RECORD)) self.addStandardHeaders() return utils.jsonEncode({ "processing_time_remaining": processingTimeRemaining, })
def GET(self): """ Get model data stats :: GET /_models/data/stats Returns: :: { "processing_time_remaining": 37 } """ with repository.engineFactory().connect() as conn: unprocessedDataCount = repository.getUnprocessedModelDataCount( conn) processingTimeRemaining = int( math.ceil(unprocessedDataCount * _PROCESSING_TIME_PER_RECORD)) self.addStandardHeaders() return utils.jsonEncode({ "processing_time_remaining": processingTimeRemaining, })
def checkAndReport(verbose, warningsAsErrors): """Run checks and report findings :param bool verbose: True for verbose mode :param bool warningsAsErrors: True to treat warnings as errors, returning non-zero result code on warnings just as for errors. """ # Read metric records from the engine's repository sqlEngine = repository.engineFactory() if verbose: g_log.info("Accessing Taurus Engine repository via %s", sqlEngine) with sqlEngine.connect() as conn: # pylint: disable=E1101 engineMetrics = repository.getAllMetrics(conn).fetchall() # Read metric records from the dynamodb taurs.metric.<environment> table dynamodbMetrics = _getMetricsFromDynamodb(verbose=verbose) # Perform all checks warnings, errors = _runAllChecks(engineMetrics=engineMetrics, dynamodbMetrics=dynamodbMetrics, verbose=verbose) # Report findings if warnings or errors or verbose: for caption, details in warnings: g_log.warn("%s\n%s", caption, details) for caption, details in errors: g_log.error("%s\n%s", caption, details) g_log.info("--------- SUMMARY ---------") for caption, _ in warnings: g_log.warn(caption) for caption, _ in errors: g_log.error(caption) if warnings: g_log.warn("Warnings: %s", len(warnings)) elif verbose: g_log.info("Warnings: 0") if errors: g_log.error("Errors: %s", len(errors)) elif verbose: g_log.info("Errors: 0") if errors or (warnings and warningsAsErrors): return 1 else: return 0
def formatMetricRowProxy(metricObj): if metricObj.tag_name is not None and len(metricObj.tag_name) > 0: displayName = "%s (%s)" % (metricObj.tag_name, metricObj.server) else: displayName = metricObj.server if (hasattr(metricObj, "parameters") and isinstance(metricObj.parameters, basestring)): parameters = json.loads(metricObj.parameters) else: parameters = metricObj.parameters engine = repository.engineFactory() allowedKeys = set([col.name for col in getMetricDisplayFields(engine)]) metricDict = dict((col, getattr(metricObj, col)) for col in metricObj.keys() if col in allowedKeys) metricDict["display_name"] = displayName metricDict["parameters"] = parameters return metricDict
def _connect(): """ Explicitly checks out a connection from the sqlalchemy engine for use inside web handler via web.ctx """ web.ctx.connFactory = repository.engineFactory().connect
def replayMetricDataToModelResultsExchange(messageBus, chunksize=DEFAULT_CHUNKSIZE): """ Reads metric data and synthesizes model inference result messages to the "model results" exchange, simulating the end result of the AnomalyService. This will afford the dynamodb service an opportunity to backfill older data :param messageBus: message bus connection :type messageBus: nta.utils.message_bus_connector.MessageBusConnector """ engine = repository.engineFactory() twoWeeksAgo = datetime.datetime.utcnow() - datetime.timedelta(days=14) # Properties for publishing model command results on RabbitMQ exchange # (same as AnomalyService) modelCommandResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE, headers=dict(dataType="model-cmd-result")) # Properties for publishing model inference results on RabbitMQ exchange # (same as AnomalyService) modelInferenceResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE) g_log.info("Getting metric data...") result = repository.getMetricData( engine, score=0, fromTimestamp=twoWeeksAgo, sort=[metric_data.c.uid, metric_data.c.rowid.asc()]) numMetricDataRows = result.rowcount g_log.info("Got %d rows", numMetricDataRows) numModels = 0 for uid, group in groupby(result, key=lambda x: x.uid): @retryOnTransientErrors def _getMetric(): return repository.getMetric(engine, uid) metricObj = _getMetric() # Send defineModel command to ensure that the metric table entry is created numModels += 1 modelCommandResult = { "status": htmengineerrno.SUCCESS, "method": "defineModel", "modelId": uid, "modelInfo": { "metricName": metricObj.name, "resource": metricObj.server, "modelSpec": json.loads(metricObj.parameters) } } # Serialize payload = anomaly_service.AnomalyService._serializeModelResult( modelCommandResult) g_log.info("Sending `defineModel` command: %r", repr(modelCommandResult)) messageBus.publishExg(exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelCommandResultProperties) metricInfo = dict(uid=metricObj.uid, name=metricObj.name, description=metricObj.description, resource=metricObj.server, location=metricObj.location, datasource=metricObj.datasource, spec=json.loads(metricObj.parameters)["metricSpec"]) args = [iter(group)] * chunksize for num, chunk in enumerate(izip_longest(fillvalue=None, *args)): # Create inferenceResultsMessage = dict( metric=metricInfo, results=[ dict(rowid=row.rowid, ts=epochFromNaiveUTCDatetime(row.timestamp), value=row.metric_value, rawAnomaly=row.raw_anomaly_score, anomaly=row.anomaly_score) for row in chunk if row is not None ]) # Serialize payload = anomaly_service.AnomalyService._serializeModelResult( inferenceResultsMessage) g_log.info( "uid=%s chunk=%d rows=%d payload_size=%d bytes from %s to %s", uid, num, len(inferenceResultsMessage["results"]), sys.getsizeof(payload), datetime.datetime.utcfromtimestamp( inferenceResultsMessage["results"][0].ts), datetime.datetime.utcfromtimestamp( inferenceResultsMessage["results"][-1].timestamp)) messageBus.publishExg(exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelInferenceResultProperties) g_log.info("Done! numMetricDataRows=%d; numModels=%d", numMetricDataRows, numModels)
def replayMetricDataToModelResultsExchange(messageBus, chunksize=DEFAULT_CHUNKSIZE): """ Reads metric data and synthesizes model inference result messages to the "model results" exchange, simulating the end result of the AnomalyService. This will afford the dynamodb service an opportunity to backfill older data :param messageBus: message bus connection :type messageBus: nta.utils.message_bus_connector.MessageBusConnector """ engine = repository.engineFactory() twoWeeksAgo = datetime.datetime.utcnow() - datetime.timedelta(days=14) # Properties for publishing model command results on RabbitMQ exchange # (same as AnomalyService) modelCommandResultProperties = MessageProperties( deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE, headers=dict(dataType="model-cmd-result") ) # Properties for publishing model inference results on RabbitMQ exchange # (same as AnomalyService) modelInferenceResultProperties = MessageProperties(deliveryMode=amqp.constants.AMQPDeliveryModes.PERSISTENT_MESSAGE) g_log.info("Getting metric data...") result = repository.getMetricData( engine, score=0, fromTimestamp=twoWeeksAgo, sort=[metric_data.c.uid, metric_data.c.rowid.asc()] ) numMetricDataRows = result.rowcount g_log.info("Got %d rows", numMetricDataRows) numModels = 0 for uid, group in groupby(result, key=lambda x: x.uid): @retryOnTransientErrors def _getMetric(): return repository.getMetric(engine, uid) metricObj = _getMetric() # Send defineModel command to ensure that the metric table entry is created numModels += 1 modelCommandResult = { "status": htmengineerrno.SUCCESS, "method": "defineModel", "modelId": uid, "modelInfo": { "metricName": metricObj.name, "resource": metricObj.server, "modelSpec": json.loads(metricObj.parameters), }, } # Serialize payload = anomaly_service.AnomalyService._serializeModelResult(modelCommandResult) g_log.info("Sending `defineModel` command: %r", repr(modelCommandResult)) messageBus.publishExg( exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelCommandResultProperties, ) metricInfo = dict( uid=metricObj.uid, name=metricObj.name, description=metricObj.description, resource=metricObj.server, location=metricObj.location, datasource=metricObj.datasource, spec=json.loads(metricObj.parameters)["metricSpec"], ) args = [iter(group)] * chunksize for num, chunk in enumerate(izip_longest(fillvalue=None, *args)): # Create inferenceResultsMessage = dict( metric=metricInfo, results=[ dict( rowid=row.rowid, ts=epochFromNaiveUTCDatetime(row.timestamp), value=row.metric_value, rawAnomaly=row.raw_anomaly_score, anomaly=row.anomaly_score, ) for row in chunk if row is not None ], ) # Serialize payload = anomaly_service.AnomalyService._serializeModelResult(inferenceResultsMessage) g_log.info( "uid=%s chunk=%d rows=%d payload_size=%d bytes from %s to %s", uid, num, len(inferenceResultsMessage["results"]), sys.getsizeof(payload), datetime.datetime.utcfromtimestamp(inferenceResultsMessage["results"][0].ts), datetime.datetime.utcfromtimestamp(inferenceResultsMessage["results"][-1].timestamp), ) messageBus.publishExg( exchange=config.get("metric_streamer", "results_exchange_name"), routingKey="", body=payload, properties=modelInferenceResultProperties, ) g_log.info("Done! numMetricDataRows=%d; numModels=%d", numMetricDataRows, numModels)