def startMonitoring(conn, metricId, swarmParams, logger): """ Start monitoring an UNMONITORED metric. NOTE: typically called either inside a transaction and/or with locked tables Starts the CLA model if provided non-None swarmParams; otherwise defers model creation to a later time and places the metric in MetricStatus.PENDING_DATA state. :param conn: SQLAlchemy Connection object for executing SQL :type conn: sqlalchemy.engine.Connection :param metricId: unique identifier of the metric row :param swarmParams: swarmParams generated via scalar_metric_utils.generateSwarmParams() or None. :param logger: logger object :returns: True if model was started; False if not :raises htmengine.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was changed by someone else (most likely another process) before this operation could complete """ modelStarted = False startTime = time.time() metricObj = repository.getMetric(conn, metricId) assert metricObj.status == MetricStatus.UNMONITORED, ( "startMonitoring: metric=%s is already monitored; status=%s" % ( metricId, metricObj.status, )) if swarmParams is not None: # We have swarmParams, so start the model modelStarted = _startModelHelper(conn=conn, metricObj=metricObj, swarmParams=swarmParams, logger=logger) else: # Put the metric into the PENDING_DATA state until enough data arrives for # stats refStatus = metricObj.status repository.setMetricStatus(conn, metricId, MetricStatus.PENDING_DATA, refStatus=refStatus) # refresh metricStatus = repository.getMetric(conn, metricId, fields=[schema.metric.c.status ]).status if metricStatus == MetricStatus.PENDING_DATA: logger.info( "startMonitoring: promoted metric to model in PENDING_DATA; " "metric=%s; duration=%.4fs", metricId, time.time() - startTime) else: raise app_exceptions.MetricStatusChangedError( "startMonitoring: unable to promote metric=%s to model as " "PENDING_DATA; metric status morphed from %s to %s" % ( metricId, refStatus, metricStatus, )) return modelStarted
def _startModelHelper(conn, metricObj, swarmParams, logger): """ Start the model :param conn: SQLAlchemy Connection object for executing SQL :type conn: sqlalchemy.engine.Connection :param metricObj: metric, freshly-loaded :type metricObj: sqlalchemy.engine.RowProxy (see repository.getMetric()) :param swarmParams: non-None swarmParams generated via scalar_metric_utils.generateSwarmParams(). :param logger: logger object :returns: True if model was started; False if not :raises htmengine.exceptions.ObjectNotFoundError: if the metric doesn't exist; this may happen if it got deleted by another process in the meantime. :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was changed by someone else (most likely another process) before this operation could complete """ if swarmParams is None: raise ValueError( "startModel: 'swarmParams' must be non-None: metric=%s" % (metricObj.uid, )) if metricObj.status not in (MetricStatus.UNMONITORED, MetricStatus.PENDING_DATA): if metricObj.status in (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE): return False logger.error("Unexpected metric status; metric=%r", metricObj) raise ValueError("startModel: unexpected metric status; metric=%r" % (metricObj, )) startTime = time.time() # Save swarm parameters and update metric status refStatus = metricObj.status repository.updateMetricColumnsForRefStatus( conn, metricObj.uid, refStatus, { "status": MetricStatus.CREATE_PENDING, "model_params": htmengine.utils.jsonEncode(swarmParams) }) metricObj = repository.getMetric( conn, metricObj.uid, fields=[schema.metric.c.uid, schema.metric.c.status]) # refresh if metricObj.status != MetricStatus.CREATE_PENDING: raise app_exceptions.MetricStatusChangedError( "startModel: unable to start model=%s; " "metric status morphed from %s to %s" % ( metricObj.uid, refStatus, metricObj.status, )) # Request to create the CLA model try: model_swapper_utils.createHTMModel(metricObj.uid, swarmParams) except Exception: logger.exception("startModel: createHTMModel failed.") repository.setMetricStatus(conn, metricObj.uid, status=MetricStatus.ERROR, message=repr(sys.exc_info()[1])) raise logger.info("startModel: started model=%r; duration=%.4fs", metricObj, time.time() - startTime) return True