def generateSwarmParams(stats): """ Generate parameters for creating a model :param stats: dict with "min", "max" and optional "minResolution"; values must be integer, float or None. :returns: if either minVal or maxVal is None, returns None; otherwise returns swarmParams object that is suitable for passing to startMonitoring and startModel """ minVal = stats.get("min") maxVal = stats.get("max") minResolution = stats.get("minResolution") if minVal is None or maxVal is None: return None # Create possible swarm parameters based on metric data possibleModels = getScalarMetricWithTimeOfDayParams( metricData=[0], minVal=minVal, maxVal=maxVal, minResolution=minResolution) swarmParams = possibleModels[0] swarmParams["inputRecordSchema"] = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) return swarmParams
def generateSwarmParamsFromCompleteModelParams(modelSpec): """ Generates a "swarm" parameter structure for model creation based on a complete set of user-specified model parameters. :param modelSpec: Model specification structure as defined by 'htmengine/adapters/datasource/model_spec_schema.json' :type modelSpec: dict :returns: If a valid set of complete model params is present, returns a swarmParams object suitable for passing to startMonitoring() and startModel(); otherwise, an empty dict is returned :rtype dict """ # 'completeModelParams' and 'modelParams' are mutex if "modelParams" in modelSpec: raise ValueError("{} modelSpec={}".format(_MUTEX_MODEL_SPEC_MSG, modelSpec)) # 'completeModelParams', 'inferenceArgs', 'timestampFieldName', and # 'valueFieldName' must all be present together completeModelParams = modelSpec["completeModelParams"] if "inferenceArgs" not in completeModelParams: raise ValueError("{} modelSpec={}".format(_NO_INFERENCE_ARGS_MSG, modelSpec)) if "timestampFieldName" not in completeModelParams: raise ValueError("{} modelSpec={}".format(_NO_TIMESTAMP_FIELD_NAME_MSG, modelSpec)) if "valueFieldName" not in completeModelParams: raise ValueError("{} modelSpec={}".format(_NO_VALUE_FIELD_NAME_MSG, modelSpec)) # check consistency in predicted field naming if (completeModelParams["inferenceArgs"]["predictedField"] != completeModelParams["valueFieldName"]): raise ValueError(_INCONSISTENT_PREDICTED_FIELD_NAME_MSG) swarmParams = dict() swarmParams["modelConfig"] = completeModelParams["modelConfig"] swarmParams["inferenceArgs"] = completeModelParams["inferenceArgs"] inputRecordSchema = ( fieldmeta.FieldMetaInfo(completeModelParams["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(completeModelParams["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) swarmParams["inputRecordSchema"] = inputRecordSchema return swarmParams
def initializeAggregator(aggSpec, modelSpec): inputRecordSchema = ( fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) dataAggregator = aggregator.Aggregator(aggregationInfo=dict( fields=([(modelSpec["valueFieldName"], aggSpec["func"])] if aggSpec is not None else []), seconds=aggSpec["windowSize"] if aggSpec is not None else 0), inputFields=inputRecordSchema) return dataAggregator
def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec): """ :param inputFileObj: A file-like object that contains input metric data :param dict inputSpec: Input data specification per input_opt_schema.json :param dict aggSpec: Optional aggregation specification per agg_opt_schema.json or None if no aggregation is requested :param dict modelSpec: Model specification per model_opt_schema.json """ self._inputSpec = inputSpec self._aggSpec = aggSpec self._modelSpec = modelSpec if "modelId" in modelSpec: self._modelId = modelSpec["modelId"] else: self._modelId = "Unknown" inputRecordSchema = ( fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) self._aggregator = aggregator.Aggregator( aggregationInfo=dict( fields=([(modelSpec["valueFieldName"], aggSpec["func"])] if aggSpec is not None else []), seconds=aggSpec["windowSize"] if aggSpec is not None else 0 ), inputFields=inputRecordSchema) self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=inputRecordSchema) self._model = self._createModel(modelSpec=modelSpec) self._anomalyLikelihood = AnomalyLikelihood() self._csvReader = self._createCsvReader(inputFileObj)
def getFieldInfo(self): """Returns the metadata specifying the format of the model's output. The result may be different than the list of nupic.data.fieldmeta.FieldMetaInfo objects supplied at initialization due to the transcoding of some input fields into meta- fields, such as datetime -> dayOfWeek, timeOfDay, etc. """ return tuple( fieldmeta.FieldMetaInfo(*args) for args in itertools.izip( self._fieldNames, self._fieldTypes, itertools.repeat(fieldmeta.FieldMetaSpecial.none)))
def generateSwarmParams(stats, classifierEnabled=False): """ Generate parameters for creating a model :param stats: dict with "min", "max" and optional "minResolution"; values must be integer, float or None. :param classifierEnabled: A Boolean value to be given to the 'clEnable' property of 'modelParams'. As the classifier generates multi-step best predictions, setting this value to True will allow multi-step best predictions to be populated in the metric_data table for the associated metric of the model. :returns: if either minVal or maxVal is None, returns None; otherwise returns swarmParams object that is suitable for passing to startMonitoring and startModel """ minVal = stats.get("min") maxVal = stats.get("max") minResolution = stats.get("minResolution") if minVal is None or maxVal is None: return None # Create possible swarm parameters based on metric data swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=minVal, maxVal=maxVal, minResolution=minResolution) # Classifier must be enabled to obtain predicted values swarmParams["modelConfig"]["modelParams"]["clEnable"] = classifierEnabled swarmParams["inputRecordSchema"] = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) return swarmParams
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ # Input column meta info compatible with parameters generated by # getScalarMetricWithTimeOfDayParams # of htmengine.algorithms.selection.clusterParams _INPUT_RECORD_SCHEMA = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalizer = _Anomalizer() @classmethod def _createModel(cls, stats): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :returns: OPF Model instance """ # TODO remove the "DummyModel" code path once the ILLEGAL INSTRUCTION issue # in nupic is resolved; # Create a dummy model instead of a real one temporarily, while we're # having trouble with the latest nupic builds on the Mac OS Yosemite that # result in ILLEGAL INSTRUCTION in nupic.bindings. This is good enough for # now to enable FrontEnd development. if False: class DummyModel(object): class Result(object): def __init__(self, inferences): self.inferences = inferences def run(self, inputRecord): inputRecord = inputRecord return self.Result(dict(anomalyScore=0.9999)) return DummyModel() else: # THIS IS THE CORRECT PRODUCTION CODE that is failing with ILLEGAL # INSTRUCTION in ModelFactory.create on my Mac OS Yosemite laptop. # Generate swarm params possibleModels = getScalarMetricWithTimeOfDayParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) swarmParams = possibleModels[0] model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model @classmethod def _readInputMessages(cls): """Create a generator that waits for and yields input messages from stdin yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the `datetime.datetime` timestamp of the metric data sample and <scalar-value> is the floating point value of the metric data sample. """ while True: message = sys.stdin.readline() if message: timestamp, scalarValue = json.loads(message) yield (datetime.utcfromtimestamp(timestamp), scalarValue) else: # Front End closed the pipe (or died) break @classmethod def _emitOutputMessage(cls, rowIndex, anomalyLikelihood): """Emit output message to stdout :param int rowIndex: 0-based index of corresponding input sample :param float anomalyLikelihood: computed anomaly likelihood value """ message = "%s\n" % (json.dumps([rowIndex, anomalyLikelihood]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyLikelihood(self, inputRow): """ Compute anomaly likelihood :param tuple inputRow: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Anomaly likelihood :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(inputRow) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood return self._anomalizer.process( timestamp=inputRow[0], metricValue=inputRow[1], rawAnomalyScore=rawAnomalyScore) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ g_log.info("Processing model=%s", self._modelId) for rowIndex, inputRow in enumerate(self._readInputMessages()): anomalyLikelihood = self._computeAnomalyLikelihood(inputRow) self._emitOutputMessage(rowIndex=rowIndex, anomalyLikelihood=anomalyLikelihood)
class _ModelRunner(object): """ Use OPF Model to process metric data samples from stdin and and emit anomaly likelihood results to stdout """ # Input column meta info compatible with parameters generated by # getScalarMetricWithTimeOfDayAnomalyParams _INPUT_RECORD_SCHEMA = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalyLikelihood = AnomalyLikelihood() @classmethod def _createModel(cls, stats): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :returns: OPF Model instance """ # Generate swarm params swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model @classmethod def _readInputMessages(cls): """Create a generator that waits for and yields input messages from stdin yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the `datetime.datetime` timestamp of the metric data sample and <scalar-value> is the floating point value of the metric data sample. """ while True: message = sys.stdin.readline() if message: timestamp, scalarValue = json.loads(message) yield (datetime.utcfromtimestamp(timestamp), scalarValue) else: # Front End closed the pipe (or died) break @classmethod def _emitOutputMessage(cls, rowIndex, anomalyProbability): """Emit output message to stdout :param int rowIndex: 0-based index of corresponding input sample :param float anomalyProbability: computed anomaly probability value """ message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),) sys.stdout.write(message) sys.stdout.flush() def _computeAnomalyProbability(self, inputRow): """ Compute anomaly log likelihood score :param tuple inputRow: Two-tuple input metric data row (<datetime-timestamp>, <float-scalar>) :returns: Log-scaled anomaly probability :rtype: float """ # Generate raw anomaly score inputRecord = self._modelRecordEncoder.encode(inputRow) rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"] # Generate anomaly likelihood score anomalyProbability = self._anomalyLikelihood.anomalyProbability( value=inputRow[1], anomalyScore=rawAnomalyScore, timestamp=inputRow[0]) return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability) def run(self): """ Run the model: ingest and process the input metric data and emit output messages containing anomaly scores """ g_log.info("Processing model=%s", self._modelId) for rowIndex, inputRow in enumerate(self._readInputMessages()): anomalyProbability = self._computeAnomalyProbability(inputRow) self._emitOutputMessage(rowIndex=rowIndex, anomalyProbability=anomalyProbability)