Beispiel #1
0
def generateSwarmParams(stats):
    """ Generate parameters for creating a model

  :param stats: dict with "min", "max" and optional "minResolution"; values must
    be integer, float or None.

  :returns: if either minVal or maxVal is None, returns None; otherwise returns
    swarmParams object that is suitable for passing to startMonitoring and
    startModel
  """
    minVal = stats.get("min")
    maxVal = stats.get("max")
    minResolution = stats.get("minResolution")
    if minVal is None or maxVal is None:
        return None

    # Create possible swarm parameters based on metric data
    possibleModels = getScalarMetricWithTimeOfDayParams(
        metricData=[0],
        minVal=minVal,
        maxVal=maxVal,
        minResolution=minResolution)

    swarmParams = possibleModels[0]

    swarmParams["inputRecordSchema"] = (
        fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                                fieldmeta.FieldMetaSpecial.timestamp),
        fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                                fieldmeta.FieldMetaSpecial.none),
    )

    return swarmParams
def generateSwarmParamsFromCompleteModelParams(modelSpec):
    """ Generates a "swarm" parameter structure for model creation based on a
  complete set of user-specified model parameters.

  :param modelSpec: Model specification structure as defined by
    'htmengine/adapters/datasource/model_spec_schema.json'
  :type modelSpec: dict

  :returns: If a valid set of complete model params is present, returns a
    swarmParams object suitable for passing to startMonitoring() and
    startModel(); otherwise, an empty dict is returned
  :rtype dict
  """
    # 'completeModelParams' and 'modelParams' are mutex
    if "modelParams" in modelSpec:
        raise ValueError("{} modelSpec={}".format(_MUTEX_MODEL_SPEC_MSG,
                                                  modelSpec))

    # 'completeModelParams', 'inferenceArgs', 'timestampFieldName', and
    # 'valueFieldName' must all be present together
    completeModelParams = modelSpec["completeModelParams"]
    if "inferenceArgs" not in completeModelParams:
        raise ValueError("{} modelSpec={}".format(_NO_INFERENCE_ARGS_MSG,
                                                  modelSpec))
    if "timestampFieldName" not in completeModelParams:
        raise ValueError("{} modelSpec={}".format(_NO_TIMESTAMP_FIELD_NAME_MSG,
                                                  modelSpec))
    if "valueFieldName" not in completeModelParams:
        raise ValueError("{} modelSpec={}".format(_NO_VALUE_FIELD_NAME_MSG,
                                                  modelSpec))

    # check consistency in predicted field naming
    if (completeModelParams["inferenceArgs"]["predictedField"] !=
            completeModelParams["valueFieldName"]):
        raise ValueError(_INCONSISTENT_PREDICTED_FIELD_NAME_MSG)

    swarmParams = dict()
    swarmParams["modelConfig"] = completeModelParams["modelConfig"]
    swarmParams["inferenceArgs"] = completeModelParams["inferenceArgs"]

    inputRecordSchema = (
        fieldmeta.FieldMetaInfo(completeModelParams["timestampFieldName"],
                                fieldmeta.FieldMetaType.datetime,
                                fieldmeta.FieldMetaSpecial.timestamp),
        fieldmeta.FieldMetaInfo(completeModelParams["valueFieldName"],
                                fieldmeta.FieldMetaType.float,
                                fieldmeta.FieldMetaSpecial.none),
    )
    swarmParams["inputRecordSchema"] = inputRecordSchema

    return swarmParams
def initializeAggregator(aggSpec, modelSpec):
    inputRecordSchema = (
        fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"],
                                fieldmeta.FieldMetaType.datetime,
                                fieldmeta.FieldMetaSpecial.timestamp),
        fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"],
                                fieldmeta.FieldMetaType.float,
                                fieldmeta.FieldMetaSpecial.none),
    )

    dataAggregator = aggregator.Aggregator(aggregationInfo=dict(
        fields=([(modelSpec["valueFieldName"],
                  aggSpec["func"])] if aggSpec is not None else []),
        seconds=aggSpec["windowSize"] if aggSpec is not None else 0),
                                           inputFields=inputRecordSchema)
    return dataAggregator
Beispiel #4
0
  def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec):
    """
    :param inputFileObj: A file-like object that contains input metric data
    :param dict inputSpec: Input data specification per input_opt_schema.json
    :param dict aggSpec: Optional aggregation specification per
      agg_opt_schema.json or None if no aggregation is requested
    :param dict modelSpec: Model specification per model_opt_schema.json
    """
    self._inputSpec = inputSpec

    self._aggSpec = aggSpec

    self._modelSpec = modelSpec

    if "modelId" in modelSpec:
      self._modelId = modelSpec["modelId"]
    else:
      self._modelId = "Unknown"


    inputRecordSchema = (
      fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"],
                              fieldmeta.FieldMetaType.datetime,
                              fieldmeta.FieldMetaSpecial.timestamp),
      fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"],
                              fieldmeta.FieldMetaType.float,
                              fieldmeta.FieldMetaSpecial.none),
    )

    self._aggregator = aggregator.Aggregator(
      aggregationInfo=dict(
        fields=([(modelSpec["valueFieldName"], aggSpec["func"])]
                if aggSpec is not None else []),
        seconds=aggSpec["windowSize"] if aggSpec is not None else 0
      ),
      inputFields=inputRecordSchema)

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=inputRecordSchema)

    self._model = self._createModel(modelSpec=modelSpec)

    self._anomalyLikelihood = AnomalyLikelihood()

    self._csvReader = self._createCsvReader(inputFileObj)
Beispiel #5
0
    def getFieldInfo(self):
        """Returns the metadata specifying the format of the model's output.

    The result may be different than the list of
    nupic.data.fieldmeta.FieldMetaInfo objects supplied at initialization due
    to the transcoding of some input fields into meta- fields, such as
    datetime -> dayOfWeek, timeOfDay, etc.
    """
        return tuple(
            fieldmeta.FieldMetaInfo(*args) for args in itertools.izip(
                self._fieldNames, self._fieldTypes,
                itertools.repeat(fieldmeta.FieldMetaSpecial.none)))
def generateSwarmParams(stats, classifierEnabled=False):
    """ Generate parameters for creating a model

  :param stats: dict with "min", "max" and optional "minResolution"; values must
    be integer, float or None.
  :param classifierEnabled: A Boolean value to be given to the 'clEnable'
    property of 'modelParams'. As the classifier generates multi-step best
    predictions, setting this value to True will allow multi-step best
    predictions to be populated in the metric_data table for the associated
    metric of the model.

  :returns: if either minVal or maxVal is None, returns None; otherwise returns
    swarmParams object that is suitable for passing to startMonitoring and
    startModel
  """
    minVal = stats.get("min")
    maxVal = stats.get("max")
    minResolution = stats.get("minResolution")
    if minVal is None or maxVal is None:
        return None

    # Create possible swarm parameters based on metric data
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
        metricData=[0],
        minVal=minVal,
        maxVal=maxVal,
        minResolution=minResolution)

    # Classifier must be enabled to obtain predicted values
    swarmParams["modelConfig"]["modelParams"]["clEnable"] = classifierEnabled

    swarmParams["inputRecordSchema"] = (
        fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                                fieldmeta.FieldMetaSpecial.timestamp),
        fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                                fieldmeta.FieldMetaSpecial.none),
    )

    return swarmParams
Beispiel #7
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayParams
  # of htmengine.algorithms.selection.clusterParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalizer = _Anomalizer()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """

    # TODO remove the "DummyModel" code path once the ILLEGAL INSTRUCTION issue
    # in nupic is resolved;
    # Create a dummy model instead of a real one temporarily, while we're
    # having trouble with the latest nupic builds on the Mac OS Yosemite that
    # result in ILLEGAL INSTRUCTION in nupic.bindings. This is good enough for
    # now to enable FrontEnd development.
    if False:
      class DummyModel(object):
        class Result(object):
          def __init__(self, inferences):
            self.inferences = inferences

        def run(self, inputRecord):
          inputRecord = inputRecord
          return self.Result(dict(anomalyScore=0.9999))

      return DummyModel()

    else:
      # THIS IS THE CORRECT PRODUCTION CODE that is failing with ILLEGAL
      # INSTRUCTION in  ModelFactory.create on my Mac OS Yosemite laptop.

      # Generate swarm params
      possibleModels = getScalarMetricWithTimeOfDayParams(
        metricData=[0],
        minVal=stats["min"],
        maxVal=stats["max"],
        minResolution=stats.get("minResolution"))

      swarmParams = possibleModels[0]

      model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
      model.enableLearning()
      model.enableInference(swarmParams["inferenceArgs"])

      return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyLikelihood):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyLikelihood: computed anomaly likelihood value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyLikelihood]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyLikelihood(self, inputRow):
    """ Compute anomaly likelihood

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Anomaly likelihood
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood
    return self._anomalizer.process(
      timestamp=inputRow[0],
      metricValue=inputRow[1],
      rawAnomalyScore=rawAnomalyScore)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyLikelihood = self._computeAnomalyLikelihood(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyLikelihood=anomalyLikelihood)
Beispiel #8
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayAnomalyParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """
    # Generate swarm params
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
      metricData=[0],
      minVal=stats["min"],
      maxVal=stats["max"],
      minResolution=stats.get("minResolution"))

    model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
    model.enableLearning()
    model.enableInference(swarmParams["inferenceArgs"])

    return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyProbability):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyProbability: computed anomaly probability value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, inputRow):
    """ Compute anomaly log likelihood score

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=inputRow[1],
      anomalyScore=rawAnomalyScore,
      timestamp=inputRow[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyProbability = self._computeAnomalyProbability(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyProbability=anomalyProbability)