Example #1
0
def compute_scores(y_test, y_pred, normalize=False):
    # Errors
    errors = np.array((y_test - y_pred)**2)
    if normalize:
        errors = errors / float(errors.max() - errors.min())

    # Log likelihood.
    log_likelihoods = []
    anomaly_likelihood = AnomalyLikelihood()
    for i in range(len(y_test)):
        likelihood = anomaly_likelihood.anomalyProbability(y_test[i],
                                                           errors[i],
                                                           timestamp=None)
        log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood)
        log_likelihoods.append(log_likelihood)

    # Anomaly thresholds:
    # - HIGH: log_likelihood >= 0.5
    # - MEDIUM: 0.5 > log_likelihood >= 0.4
    N = len(log_likelihoods)
    anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)}
    x = np.array(log_likelihoods)
    high_idx = x >= 0.5
    anomalies['high'][high_idx] = 1
    # medium_idx = np.logical_and(x >= 0.4, x < 0.5)
    # anomalies['medium'][medium_idx] = 1

    return errors, log_likelihoods, anomalies
Example #2
0
class buildmodel:
    def __init__(self):
        #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp")
        with open("model_params.json") as fp:
            self.model_params = json.load(fp)
        print self.model_params
        self.newmodel = ModelFactory.create(self.model_params)
        self.newmodel.enableLearning()
        self.newmodel.enableInference({"predictedField": "value"})
        self.DATE_FORMAT = "%d/%m/%Y %H:%M"
        self.anomalylikelihood = AnomalyLikelihood()

    def processdata(self, data):
        timestamp = datetime.datetime.strptime(data[0], self.DATE_FORMAT)
        ce = float(data[1])
        result = self.newmodel.run({"dttm": timestamp, "value": ce})
        #print result
        anomalyScore = result.inferences["anomalyScore"]
        anomaly = self.anomalylikelihood.anomalyProbability(
            ce, anomalyScore, timestamp)
        logLikelihood = self.anomalylikelihood.computeLogLikelihood(anomaly)
        logLikelihood = logLikelihood * 100
        print logLikelihood
        '''if anomaly > 0.999:
			print "Detected high level anomaly at "+str(timestamp)
		elif anomaly>0.958:
			print "Detected medium level anomaly at "+str(timestamp)'''
        if logLikelihood > 20:
            print "Detected high level anomaly at " + str(timestamp)
        elif logLikelihood > 15:
            print "Detected medium level anomaly at " + str(timestamp)
Example #3
0
def runAvogadroAnomaly(metric, options):
    """
  Create a new HTM Model, fetch the data from the local DB, process it in NuPIC,
  and save the results to a new CSV output file.

  :param metric: AvogadroAgent metric class
  :param options: CLI Options
  """
    model = createModel(metric)
    model.enableInference({"predictedField": metric.name})

    fetched = metric.fetch(prefix=options.prefix, start=None)

    resultFile = open(
        os.path.join(options.prefix, metric.name + "-result.csv"), "wb")
    csvWriter = csv.writer(resultFile)
    csvWriter.writerow([
        "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood",
        "color"
    ])

    headers = ("timestamp", metric.name)

    anomalyLikelihood = AnomalyLikelihood()

    for (ts, value) in fetched:
        try:
            value = float(value)
        except (ValueError, TypeError):
            continue

        if not math.isnan(value):
            modelInput = dict(zip(headers, (ts, value)))
            modelInput[metric.name] = float(value)
            modelInput["timestamp"] = datetime.datetime.fromtimestamp(
                float(modelInput["timestamp"]))
            result = model.run(modelInput)
            anomalyScore = result.inferences["anomalyScore"]

            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput[metric.name], anomalyScore, modelInput["timestamp"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)

            if logLikelihood > .5:
                color = "red"
            elif logLikelihood > .4 and logLikelihood <= .5:
                color = "yellow"
            else:
                color = "green"

            csvWriter.writerow([
                modelInput["timestamp"],
                float(value), anomalyScore, logLikelihood, color
            ])

    else:
        resultFile.flush()
def runAvogadroAnomaly(metric, options):
  """
  Create a new HTM Model, fetch the data from the local DB, process it in NuPIC,
  and save the results to a new CSV output file.

  :param metric: AvogadroAgent metric class
  :param options: CLI Options
  """
  model = createModel(metric)
  model.enableInference({"predictedField": metric.name})

  fetched = metric.fetch(prefix=options.prefix, start=None)

  resultFile = open(os.path.join(options.prefix, metric.name + "-result.csv"),
                    "wb")
  csvWriter = csv.writer(resultFile)
  csvWriter.writerow(["timestamp", metric.name, "raw_anomaly_score",
                      "anomaly_likelihood", "color"])

  headers = ("timestamp", metric.name)

  anomalyLikelihood = AnomalyLikelihood()

  for (ts, value) in fetched:
    try:
      value = float(value)
    except (ValueError, TypeError):
      continue

    if not math.isnan(value):
      modelInput = dict(zip(headers, (ts, value)))
      modelInput[metric.name] = float(value)
      modelInput["timestamp"] = datetime.datetime.fromtimestamp(
        float(modelInput["timestamp"]))
      result = model.run(modelInput)
      anomalyScore = result.inferences["anomalyScore"]

      likelihood = anomalyLikelihood.anomalyProbability(
        modelInput[metric.name], anomalyScore, modelInput["timestamp"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)

      if logLikelihood > .5:
        color = "red"
      elif logLikelihood > .4 and logLikelihood <= .5:
        color = "yellow"
      else:
        color = "green"

      csvWriter.writerow([modelInput["timestamp"], float(value),
                          anomalyScore, logLikelihood, color])

  else:
    resultFile.flush()
Example #5
0
def runAnomaly(options):
    global g_ps_count_dict_unsorted
    global g_abnomal_data_dict_unsorted
    """
    Create and run a CLA Model on the given dataset (based on the hotgym anomaly
    client in NuPIC).
    """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    if options.oswpsDir != "":
        # Get PS dictionary
        osw = OSWData(options.oswpsDir, PS)
        osw.traverse_dir()
        g_ps_count_dict_unsorted = osw.get_ps_dict()
        options.max = ps_max_value = max(g_ps_count_dict_unsorted.values())
        options.min = ps_min_value = min(g_ps_count_dict_unsorted.values())
        print("Min value:" + str(ps_min_value) + ', ' + "Max value:" +
              str(ps_max_value))

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print("Using resolution value: {0}".format(resolution))
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    if options.inputFile != "":
        with open(options.inputFile) as fin:
            # Open file and setup headers
            # Here we write the log likelihood value as the 'anomaly score'
            # The actual CLA outputs are labeled 'raw anomaly score'
            reader = csv.reader(fin)
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            headers = reader.next()

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, record in enumerate(reader, start=1):

                # Convert input data to a dict so we can pass it into the model
                inputData = dict(zip(headers, record))
                inputData["value"] = float(inputData["value"])
                inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"
    elif options.oswpsDir != "":
        if options.use_rtm == True:
            rtm_sensitivity = 2
            rtm = LinearRegressionTemoporalMemory(window=10,
                                                  interval=10,
                                                  min_=options.min,
                                                  max_=options.max,
                                                  boost=rtm_sensitivity,
                                                  leak_detection=0,
                                                  critical_region="right_tail",
                                                  debug=0)
            g_abnomal_data_dict_unsorted = rtm.analyze(
                g_ps_count_dict_unsorted)
        else:
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            ps_od = collections.OrderedDict(
                sorted(g_ps_count_dict_unsorted.items()))

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, timestamp in enumerate(ps_od):
                ps_count = ps_od[timestamp]

                inputData = {}
                inputData["value"] = float(ps_count)
                inputData["dttm"] = dateutil.parser.parse(timestamp)
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood
                    g_abnomal_data_dict_unsorted[timestamp] = ps_count

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"

            print "Completed processing", i, "records at", datetime.datetime.now(
            )
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
Example #6
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """


  def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec):
    """
    :param inputFileObj: A file-like object that contains input metric data
    :param dict inputSpec: Input data specification per input_opt_schema.json
    :param dict aggSpec: Optional aggregation specification per
      agg_opt_schema.json or None if no aggregation is requested
    :param dict modelSpec: Model specification per model_opt_schema.json
    """
    self._inputSpec = inputSpec

    self._aggSpec = aggSpec

    self._modelSpec = modelSpec

    if "modelId" in modelSpec:
      self._modelId = modelSpec["modelId"]
    else:
      self._modelId = "Unknown"


    inputRecordSchema = (
      fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"],
                              fieldmeta.FieldMetaType.datetime,
                              fieldmeta.FieldMetaSpecial.timestamp),
      fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"],
                              fieldmeta.FieldMetaType.float,
                              fieldmeta.FieldMetaSpecial.none),
    )

    self._aggregator = aggregator.Aggregator(
      aggregationInfo=dict(
        fields=([(modelSpec["valueFieldName"], aggSpec["func"])]
                if aggSpec is not None else []),
        seconds=aggSpec["windowSize"] if aggSpec is not None else 0
      ),
      inputFields=inputRecordSchema)

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=inputRecordSchema)

    self._model = self._createModel(modelSpec=modelSpec)

    self._anomalyLikelihood = AnomalyLikelihood()

    self._csvReader = self._createCsvReader(inputFileObj)


  @staticmethod
  def _createModel(modelSpec):
    """Instantiate and configure an OPF model

    :param dict modelSpec: Model specification per model_opt_schema.json

    :returns: OPF Model instance
    """

    model = ModelFactory.create(modelConfig=modelSpec["modelConfig"])
    model.enableLearning()
    model.enableInference(modelSpec["inferenceArgs"])

    return model


  @staticmethod
  def _createCsvReader(fileObj):
    # We'll be operating on csvs with arbitrarily long fields
    csv.field_size_limit(2**27)

    # Make sure readline() works on windows too
    os.linesep = "\n"

    return csv.reader(fileObj, dialect="excel")


  @classmethod
  def _emitOutputMessage(cls, dataRow, anomalyProbability):
    """Emit output message to stdout

    :param list dataRow: the two-tuple data row on which anomalyProbability was
      computed, whose first element is datetime timestamp and second element is
      the float scalar value
    :param float anomalyProbability: computed anomaly probability value
    """

    message = "%s\n" % (json.dumps([dataRow[0].isoformat(), dataRow[1], anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, fields):
    """ Compute anomaly log likelihood score

    :param tuple fields: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(fields)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=fields[1],
      anomalyScore=rawAnomalyScore,
      timestamp=fields[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """

    numRowsToSkip = self._inputSpec["rowOffset"]
    datetimeFormat = self._inputSpec["datetimeFormat"]
    inputRowTimestampIndex = self._inputSpec["timestampIndex"]
    inputRowValueIndex = self._inputSpec["valueIndex"]

    g_log.info("Processing model=%s", self._modelId)

    for inputRow in self._csvReader:
      g_log.debug("Got inputRow=%r", inputRow)

      if numRowsToSkip > 0:
        numRowsToSkip -= 1
        g_log.debug("Skipping header row %s; %s rows left to skip",
                    inputRow, numRowsToSkip)
        continue

      # Extract timestamp and value
      # NOTE: the order must match the `inputFields` that we passed to the
      # Aggregator constructor
      fields = [
        date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex],
                                      datetimeFormat),
        float(inputRow[inputRowValueIndex])
      ]

      # Aggregate
      aggRow, _ = self._aggregator.next(fields, None)
      g_log.debug("Aggregator returned %s for %s", aggRow, fields)
      if aggRow is not None:
        self._emitOutputMessage(
          dataRow=aggRow,
          anomalyProbability=self._computeAnomalyProbability(aggRow))


    # Reap remaining data from aggregator
    aggRow, _ = self._aggregator.next(None, curInputBookmark=None)
    g_log.debug("Aggregator reaped %s in final call", aggRow)
    if aggRow is not None:
      self._emitOutputMessage(
        dataRow=aggRow,
        anomalyProbability=self._computeAnomalyProbability(aggRow))
def runAnomaly(options):
    """
  Create and run a CLA Model on the given dataset (based on the hotgym anomaly
  client in NuPIC).
  """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print "Using resolution value: {0}".format(resolution)
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    with open(options.inputFile) as fin:

        # Open file and setup headers
        # Here we write the log likelihood value as the 'anomaly score'
        # The actual CLA outputs are labeled 'raw anomaly score'
        reader = csv.reader(fin)
        csvWriter = csv.writer(open(options.outputFile, "wb"))
        csvWriter.writerow([
            "timestamp", "value", "_raw_score", "likelihood_score",
            "log_likelihood_score"
        ])
        headers = reader.next()

        # The anomaly likelihood object
        anomalyLikelihood = AnomalyLikelihood()

        # Iterate through each record in the CSV file
        print "Starting processing at", datetime.datetime.now()
        for i, record in enumerate(reader, start=1):

            # Convert input data to a dict so we can pass it into the model
            inputData = dict(zip(headers, record))
            inputData["value"] = float(inputData["value"])
            inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
            #inputData["dttm"] = datetime.datetime.now()

            # Send it to the CLA and get back the raw anomaly score
            result = model.run(inputData)
            anomalyScore = result.inferences['anomalyScore']

            # Compute the Anomaly Likelihood
            likelihood = anomalyLikelihood.anomalyProbability(
                inputData["value"], anomalyScore, inputData["dttm"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
            if likelihood > 0.9999:
                print "Anomaly detected:", inputData['dttm'], inputData[
                    'value'], likelihood

            # Write results to the output CSV file
            csvWriter.writerow([
                inputData["dttm"], inputData["value"], anomalyScore,
                likelihood, logLikelihood
            ])

            # Progress report
            if (i % 1000) == 0: print i, "records processed"

    print "Completed processing", i, "records at", datetime.datetime.now()
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
Example #8
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayAnomalyParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """
    # Generate swarm params
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
      metricData=[0],
      minVal=stats["min"],
      maxVal=stats["max"],
      minResolution=stats.get("minResolution"))

    model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
    model.enableLearning()
    model.enableInference(swarmParams["inferenceArgs"])

    return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyProbability):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyProbability: computed anomaly probability value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, inputRow):
    """ Compute anomaly log likelihood score

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=inputRow[1],
      anomalyScore=rawAnomalyScore,
      timestamp=inputRow[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyProbability = self._computeAnomalyProbability(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyProbability=anomalyProbability)
Example #9
0
def runAnomaly(options):
  """
  Create and run a CLA Model on the given dataset (based on the hotgym anomaly
  client in NuPIC).
  """
  # Load the model params JSON
  with open("model_params.json") as fp:
    modelParams = json.load(fp)

  # Update the resolution value for the encoder
  sensorParams = modelParams['modelParams']['sensorParams']
  numBuckets = modelParams['modelParams']['sensorParams']['encoders']['value'].pop('numBuckets')
  resolution = options.resolution
  if resolution is None:
    resolution = max(0.001,
                     (options.max - options.min) / numBuckets)
  print "Using resolution value: {0}".format(resolution)
  sensorParams['encoders']['value']['resolution'] = resolution

  model = ModelFactory.create(modelParams)
  model.enableInference({'predictedField': 'value'})
  with open (options.inputFile) as fin:
    
    # Open file and setup headers
    # Here we write the log likelihood value as the 'anomaly score'
    # The actual CLA outputs are labeled 'raw anomaly score'
    reader = csv.reader(fin)
    csvWriter = csv.writer(open(options.outputFile,"wb"))
    csvWriter.writerow(["timestamp", "value",
                        "_raw_score", "likelihood_score", "log_likelihood_score"])
    headers = reader.next()
    
    # The anomaly likelihood object
    anomalyLikelihood = AnomalyLikelihood()
    
    # Iterate through each record in the CSV file
    print "Starting processing at",datetime.datetime.now()
    for i, record in enumerate(reader, start=1):
      
      # Convert input data to a dict so we can pass it into the model
      inputData = dict(zip(headers, record))
      inputData["value"] = float(inputData["value"])
      inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
      #inputData["dttm"] = datetime.datetime.now()
      
      # Send it to the CLA and get back the raw anomaly score
      result = model.run(inputData)
      anomalyScore = result.inferences['anomalyScore']
      
      # Compute the Anomaly Likelihood
      likelihood = anomalyLikelihood.anomalyProbability(
        inputData["value"], anomalyScore, inputData["dttm"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
      if likelihood > 0.9999:
        print "Anomaly detected:",inputData['dttm'],inputData['value'],likelihood

      # Write results to the output CSV file
      csvWriter.writerow([inputData["dttm"], inputData["value"],
                          anomalyScore, likelihood, logLikelihood])

      # Progress report
      if (i%1000) == 0: print i,"records processed"

  print "Completed processing",i,"records at",datetime.datetime.now()
  print "Anomaly scores for",options.inputFile,
  print "have been written to",options.outputFile
Example #10
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayAnomalyParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """
    # Generate swarm params
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
      metricData=[0],
      minVal=stats["min"],
      maxVal=stats["max"],
      minResolution=stats.get("minResolution"))

    model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
    model.enableLearning()
    model.enableInference(swarmParams["inferenceArgs"])

    return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyProbability):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyProbability: computed anomaly probability value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, inputRow):
    """ Compute anomaly log likelihood score

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=inputRow[1],
      anomalyScore=rawAnomalyScore,
      timestamp=inputRow[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyProbability = self._computeAnomalyProbability(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyProbability=anomalyProbability)
Example #11
0
      anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None'
      pred_result = shifter.shift(result)
      if result.inferences["multiStepBestPredictions"][1]:
        prediction = result.inferences["multiStepBestPredictions"][1]
        print prediction
      else:
        prediction = 'None'


    if not PREDICT or prediction == 'None':
      # Anomaly-Stats: 
      anomalyScore = result.inferences["anomalyScore"]
      AnomalyScores.append(anomalyScore)
      # By default 0.5 for the first 600 iterations! TODO: Still not quite sure if that's alright...
      likelihood = anomalyLikelihood.anomalyProbability(event[0] + numpy.array([event[1]]), anomalyScore, modelInput["timestamp"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
      LikelihoodScores.append([modelInput["timestamp"], modelInput["event"], likelihood])
      prediction = 'None'


    # NOTE: change mag to scalar -more general! -Typecasting for DB
    data = {"eventType": str(event.type),
            "lat": float(event.latitude),
            "lng": float(event.longitude),
            "depth": float(event.depth),
            "scalar": float(event.mag),
            "timestamp": str(event.time),
            "AnomalyScore": float(anomalyScore),
            "Anomaly_mean": (float(numpy.mean(AnomalyScores)), WINDOWSIZE),
            "AnomalyLikelihood": float(likelihood),
            "logLikelihood": float(logLikelihood),
Example #12
0
                                               "%Y-%m-%dT%H:%M:%S.%fZ")
        # input_event = (timestamp, input_event)
        modelInput = {}
        modelInput["event"] = input_event
        modelInput["timestamp"] = (timestamp)
        result = model.run(modelInput)
        model.save(MODELSTATE)
        # print result

        if not PREDICT:
            # Anomaly-Stats:
            anomalyScore = result.inferences["anomalyScore"]
            # By default 0.5 for the first 600 iterations!
            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput["event"], anomalyScore, modelInput["timestamp"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
            AnomalyScores.append(anomalyScore)
            LikelihoodScores.append(
                [modelInput["timestamp"], modelInput["event"], likelihood])
            prediction = 'None'

        if PREDICT:
            # Handle Anomaly:
            anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None'
            pred_result = shifter.shift(result)
            if result.inferences["multiStepBestPredictions"][1]:
                prediction = result.inferences["multiStepBestPredictions"][1]
                print prediction
            else:
                prediction = 'None'
Example #13
0
    ####################################################

    sdr_output = np.zeros(N_COLUMNS)
    sp.compute(encoder_output, True, sdr_output)
    active_columns = np.nonzero(sdr_output)[0]

    ####################################################

    tm.compute(active_columns, learn=True)

    ####################################################

    anom_score[i] = anomaly_score.compute(tm.getActiveCells(),
                                          tm.getPredictiveCells())

    anom_logscore[i] = anomaly_likelihood.computeLogLikelihood(anom_score[i])

    if i % 100 == 0:

        print(i)

anom_score_futuro = np.zeros((5000 + 1, ))  ## excluir isso aqui
anom_logscore_futuro = np.zeros((5000 + 1, ))  ## excluir isso aqui

for i, linha in enumerate(
        sign[7220000:7225000, :]
):  ##esse for é o do arquivo teste pra ver se é possível encontrar as anomalias
    #excluir esse for depois
    scalar_encoder.encodeIntoArray(linha[1], bits_scalar)
    time_encoder.encodeIntoArray(linha[0], bits_time)
Example #14
0
def run_model(model, a, b, save=True, aggregate=False, string=''):
    """Runs the HTM model and generates the anomaly scores.
    Arguments:
        :model: the model created with create_model().
        :a: the beginning of the anylized signal.
        :b: the end of the anylized signal.
        :save: if True then the anomalies output will be saved as .txt.
        :string: the string to differentiate the name of the saved .txt files.
    """

    ######################### open the signs ###########################################
    if aggregate == True:
        signal, time_vect = aggregate_(a, b)
        print("the size of signal is: {i}".format(i=np.size(signal)))

    else:
        signal = open_signs()
        signal = signal[a:b, 1]
    #-----------------------------------------------------------------------------------

    ##################### declare the anomalies lists ##################################
    anom_scores = []
    anom_likelihood = []
    anom_loglikelihood = []
    #-----------------------------------------------------------------------------------

    ##################### declare the predicted list ###################################
    predictions_1 = []
    predictions_5 = []
    predictions_1.append(0)
    for i in range(5):
        predictions_5.append(
            0
        )  # as this prediction is always made 1 step ahead, then the first value predicted will be ...
        # the prediction of the index with number 1, therefore doesn't exist a prediction of the 0 ...
        # index. The same problem occurs with the last signal, because it will predict one more ...
        # step ahead, this means that after seen the last signal "A", it will predict "A+1" even it doesnt ...
        # having a matching value in the signal array.
    #-----------------------------------------------------------------------------------

    ################ declare the Anom likelihood class #################################
    likelihood = AnomalyLikelihood(learningPeriod=300)
    #-----------------------------------------------------------------------------------

    for counter, value in enumerate(
            signal
    ):  # iterate over each value in the signal array, the  counter is used for debugging purposes

        ############ declare the dict which will be passed to the model ###############
        inputRecords = {
        }  # the model only accepts data in a specific dict format ...
        inputRecords['c1'] = float(value)  # this format is shown here:
        #-------------------------------------------------------------------------------

        ############ run the HTM model over the inputRecords dict ######################
        result = model.run(inputRecords)
        #-------------------------------------------------------------------------------

        ############ compute the anomaly likelihood and loglikelihood ###################
        current_likelihood = likelihood.anomalyProbability(
            value, result.inferences["anomalyScore"], timestamp=None)
        current_loglikelihood = likelihood.computeLogLikelihood(
            current_likelihood)
        #--------------------------------------------------------------------------------
        ################################ PREDICTIONS ####################################
        bestPredictions = result.inferences[
            "multiStepBestPredictions"]  # obtain the predicted value from infereces dict
        predictions_1.append(bestPredictions[1])
        predictions_5.append(
            bestPredictions[5])  # append the value to the _predict array

        #--------------------------------------------------------------------------------

        ########### add the anomaly values to the respective list #######################
        anom_scores.append(result.inferences["anomalyScore"])
        anom_likelihood.append(current_likelihood)
        anom_loglikelihood.append(current_loglikelihood)
        #--------------------------------------------------------------------------------
        ################# print the input and prediction, for debugging purposes ########
        if counter % 1 == 0:
            #print("Actual input [%d]: %f" % (counter, value))
            print(
                'prediction of [{0}]:(input) {1:8} (1-step) {2:8} (5-step) {3:8}'
                .format(counter, value, predictions_1[counter],
                        predictions_5[counter]))
            #print("Input[%d]: %f" % (counter+1,signal[counter+1]))
            #print("Multi Step Predictions: %s" % (result.inferences["multiStepPredictions"]))
            #print("\n")
        #--------------------------------------------------------------------------------

    ################# save the anomaly and prediction array #########################
    if save == True:
        np.savetxt("anom_score_" + string + ".txt", anom_scores, delimiter=','
                   )  # the "string" is to differentiate the training and ...
        # the online learning outputs.

        np.savetxt("anom_likelihood_" + string + ".txt",
                   anom_likelihood,
                   delimiter=',')

        np.savetxt("anom_logscore_" + string + ".txt",
                   anom_loglikelihood,
                   delimiter=',')

        np.savetxt("anom_prediction_1" + string + ".txt",
                   predictions_1,
                   delimiter=',')

        np.savetxt("anom_prediction_5" + string + ".txt",
                   predictions_5,
                   delimiter=',')