Ejemplo n.º 1
0
def compute_scores(y_test, y_pred, normalize=False):
    # Errors
    errors = np.array((y_test - y_pred)**2)
    if normalize:
        errors = errors / float(errors.max() - errors.min())

    # Log likelihood.
    log_likelihoods = []
    anomaly_likelihood = AnomalyLikelihood()
    for i in range(len(y_test)):
        likelihood = anomaly_likelihood.anomalyProbability(y_test[i],
                                                           errors[i],
                                                           timestamp=None)
        log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood)
        log_likelihoods.append(log_likelihood)

    # Anomaly thresholds:
    # - HIGH: log_likelihood >= 0.5
    # - MEDIUM: 0.5 > log_likelihood >= 0.4
    N = len(log_likelihoods)
    anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)}
    x = np.array(log_likelihoods)
    high_idx = x >= 0.5
    anomalies['high'][high_idx] = 1
    # medium_idx = np.logical_and(x >= 0.4, x < 0.5)
    # anomalies['medium'][medium_idx] = 1

    return errors, log_likelihoods, anomalies
Ejemplo n.º 2
0
class buildmodel:
    def __init__(self):
        #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp")
        with open("model_params.json") as fp:
            self.model_params = json.load(fp)
        print self.model_params
        self.newmodel = ModelFactory.create(self.model_params)
        self.newmodel.enableLearning()
        self.newmodel.enableInference({"predictedField": "value"})
        self.DATE_FORMAT = "%d/%m/%Y %H:%M"
        self.anomalylikelihood = AnomalyLikelihood()

    def processdata(self, data):
        timestamp = datetime.datetime.strptime(data[0], self.DATE_FORMAT)
        ce = float(data[1])
        result = self.newmodel.run({"dttm": timestamp, "value": ce})
        #print result
        anomalyScore = result.inferences["anomalyScore"]
        anomaly = self.anomalylikelihood.anomalyProbability(
            ce, anomalyScore, timestamp)
        logLikelihood = self.anomalylikelihood.computeLogLikelihood(anomaly)
        logLikelihood = logLikelihood * 100
        print logLikelihood
        '''if anomaly > 0.999:
			print "Detected high level anomaly at "+str(timestamp)
		elif anomaly>0.958:
			print "Detected medium level anomaly at "+str(timestamp)'''
        if logLikelihood > 20:
            print "Detected high level anomaly at " + str(timestamp)
        elif logLikelihood > 15:
            print "Detected medium level anomaly at " + str(timestamp)
Ejemplo n.º 3
0
def runAvogadroAnomaly(metric, options):
    """
  Create a new HTM Model, fetch the data from the local DB, process it in NuPIC,
  and save the results to a new CSV output file.

  :param metric: AvogadroAgent metric class
  :param options: CLI Options
  """
    model = createModel(metric)
    model.enableInference({"predictedField": metric.name})

    fetched = metric.fetch(prefix=options.prefix, start=None)

    resultFile = open(
        os.path.join(options.prefix, metric.name + "-result.csv"), "wb")
    csvWriter = csv.writer(resultFile)
    csvWriter.writerow([
        "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood",
        "color"
    ])

    headers = ("timestamp", metric.name)

    anomalyLikelihood = AnomalyLikelihood()

    for (ts, value) in fetched:
        try:
            value = float(value)
        except (ValueError, TypeError):
            continue

        if not math.isnan(value):
            modelInput = dict(zip(headers, (ts, value)))
            modelInput[metric.name] = float(value)
            modelInput["timestamp"] = datetime.datetime.fromtimestamp(
                float(modelInput["timestamp"]))
            result = model.run(modelInput)
            anomalyScore = result.inferences["anomalyScore"]

            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput[metric.name], anomalyScore, modelInput["timestamp"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)

            if logLikelihood > .5:
                color = "red"
            elif logLikelihood > .4 and logLikelihood <= .5:
                color = "yellow"
            else:
                color = "green"

            csvWriter.writerow([
                modelInput["timestamp"],
                float(value), anomalyScore, logLikelihood, color
            ])

    else:
        resultFile.flush()
def runAvogadroAnomaly(metric, options):
  """
  Create a new HTM Model, fetch the data from the local DB, process it in NuPIC,
  and save the results to a new CSV output file.

  :param metric: AvogadroAgent metric class
  :param options: CLI Options
  """
  model = createModel(metric)
  model.enableInference({"predictedField": metric.name})

  fetched = metric.fetch(prefix=options.prefix, start=None)

  resultFile = open(os.path.join(options.prefix, metric.name + "-result.csv"),
                    "wb")
  csvWriter = csv.writer(resultFile)
  csvWriter.writerow(["timestamp", metric.name, "raw_anomaly_score",
                      "anomaly_likelihood", "color"])

  headers = ("timestamp", metric.name)

  anomalyLikelihood = AnomalyLikelihood()

  for (ts, value) in fetched:
    try:
      value = float(value)
    except (ValueError, TypeError):
      continue

    if not math.isnan(value):
      modelInput = dict(zip(headers, (ts, value)))
      modelInput[metric.name] = float(value)
      modelInput["timestamp"] = datetime.datetime.fromtimestamp(
        float(modelInput["timestamp"]))
      result = model.run(modelInput)
      anomalyScore = result.inferences["anomalyScore"]

      likelihood = anomalyLikelihood.anomalyProbability(
        modelInput[metric.name], anomalyScore, modelInput["timestamp"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)

      if logLikelihood > .5:
        color = "red"
      elif logLikelihood > .4 and logLikelihood <= .5:
        color = "yellow"
      else:
        color = "green"

      csvWriter.writerow([modelInput["timestamp"], float(value),
                          anomalyScore, logLikelihood, color])

  else:
    resultFile.flush()
    def testLikelihoodValues(self):
        """ test to see if the region keeps track of state correctly and produces
        the same likelihoods as the AnomalyLikelihood module """
        anomalyLikelihoodRegion = AnomalyLikelihoodRegion()
        anomalyLikelihood = AnomalyLikelihood()

        inputs = AnomalyLikelihoodRegion.getSpec()['inputs']
        outputs = AnomalyLikelihoodRegion.getSpec()['outputs']
        with open(_INPUT_DATA_FILE) as f:
            reader = csv.reader(f)
            reader.next()
            for record in reader:
                consumption = float(record[1])
                anomalyScore = float(record[2])
                likelihood1 = anomalyLikelihood.anomalyProbability(
                    consumption, anomalyScore)

                inputs['rawAnomalyScore'] = numpy.array([anomalyScore])
                inputs['metricValue'] = numpy.array([consumption])
                anomalyLikelihoodRegion.compute(inputs, outputs)
                likelihood2 = outputs['anomalyLikelihood'][0]

                self.assertEqual(likelihood1, likelihood2)
Ejemplo n.º 6
0
  def testLikelihoodValues(self):
    """ test to see if the region keeps track of state correctly and produces
        the same likelihoods as the AnomalyLikelihood module """
    anomalyLikelihoodRegion = AnomalyLikelihoodRegion()
    anomalyLikelihood = AnomalyLikelihood()

    inputs = AnomalyLikelihoodRegion.getSpec()['inputs']
    outputs = AnomalyLikelihoodRegion.getSpec()['outputs']
    with open (_INPUT_DATA_FILE) as f:
      reader = csv.reader(f)
      reader.next()
      for record in reader:
        consumption = float(record[1])
        anomalyScore = float(record[2])
        likelihood1 = anomalyLikelihood.anomalyProbability(
          consumption, anomalyScore)

        inputs['rawAnomalyScore'] = numpy.array([anomalyScore])
        inputs['metricValue'] = numpy.array([consumption])
        anomalyLikelihoodRegion.compute(inputs, outputs)
        likelihood2 = outputs['anomalyLikelihood'][0]

        self.assertEqual(likelihood1, likelihood2)
Ejemplo n.º 7
0
def runAnomaly(options):
  """
  Create and run a CLA Model on the given dataset (based on the hotgym anomaly
  client in NuPIC).
  """
  # Load the model params JSON
  with open("model_params.json") as fp:
    modelParams = json.load(fp)

  # Update the resolution value for the encoder
  sensorParams = modelParams['modelParams']['sensorParams']
  numBuckets = modelParams['modelParams']['sensorParams']['encoders']['value'].pop('numBuckets')
  resolution = options.resolution
  if resolution is None:
    resolution = max(0.001,
                     (options.max - options.min) / numBuckets)
  print "Using resolution value: {0}".format(resolution)
  sensorParams['encoders']['value']['resolution'] = resolution

  model = ModelFactory.create(modelParams)
  model.enableInference({'predictedField': 'value'})
  with open (options.inputFile) as fin:
    
    # Open file and setup headers
    # Here we write the log likelihood value as the 'anomaly score'
    # The actual CLA outputs are labeled 'raw anomaly score'
    reader = csv.reader(fin)
    csvWriter = csv.writer(open(options.outputFile,"wb"))
    csvWriter.writerow(["timestamp", "value",
                        "_raw_score", "likelihood_score", "log_likelihood_score"])
    headers = reader.next()
    
    # The anomaly likelihood object
    anomalyLikelihood = AnomalyLikelihood()
    
    # Iterate through each record in the CSV file
    print "Starting processing at",datetime.datetime.now()
    for i, record in enumerate(reader, start=1):
      
      # Convert input data to a dict so we can pass it into the model
      inputData = dict(zip(headers, record))
      inputData["value"] = float(inputData["value"])
      inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
      #inputData["dttm"] = datetime.datetime.now()
      
      # Send it to the CLA and get back the raw anomaly score
      result = model.run(inputData)
      anomalyScore = result.inferences['anomalyScore']
      
      # Compute the Anomaly Likelihood
      likelihood = anomalyLikelihood.anomalyProbability(
        inputData["value"], anomalyScore, inputData["dttm"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
      if likelihood > 0.9999:
        print "Anomaly detected:",inputData['dttm'],inputData['value'],likelihood

      # Write results to the output CSV file
      csvWriter.writerow([inputData["dttm"], inputData["value"],
                          anomalyScore, likelihood, logLikelihood])

      # Progress report
      if (i%1000) == 0: print i,"records processed"

  print "Completed processing",i,"records at",datetime.datetime.now()
  print "Anomaly scores for",options.inputFile,
  print "have been written to",options.outputFile
Ejemplo n.º 8
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  :param slidingWindowSize: [optional] - how many elements are summed up;
      enables moving average on final anomaly score; int >= 0

  :param mode: (string) [optional] how to compute anomaly, one of:

      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_PURE`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_LIKELIHOOD`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_WEIGHTED`

  :param binaryAnomalyThreshold: [optional] if set [0,1] anomaly score
       will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
       The transformation is applied after moving average is computed.

  """


  # anomaly modes supported
  MODE_PURE = "pure"
  """
  Default mode. The raw anomaly score as computed by
  :func:`~.anomaly_likelihood.computeRawAnomalyScore`
  """
  MODE_LIKELIHOOD = "likelihood"
  """
  Uses the :class:`~.anomaly_likelihood.AnomalyLikelihood` class, which models
  probability of receiving this value and anomalyScore
  """
  MODE_WEIGHTED = "weighted"
  """
  Multiplies the likelihood result with the raw anomaly score that was used to
  generate the likelihood (anomaly * likelihood)
  """

  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self,
               slidingWindowSize=None,
               mode=MODE_PURE,
               binaryAnomalyThreshold=None):
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if (self._mode == Anomaly.MODE_LIKELIHOOD or
        self._mode == Anomaly.MODE_WEIGHTED):
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    else:
      self._likelihood = None

    if not self._mode in self._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)

    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and (
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")


  def compute(self, activeColumns, predictedColumns,
              inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    :param activeColumns: array of active column indices
    :param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    :param inputValue: (optional) value of current input to encoders
                                  (eg "cat" for category encoder)
                                  (used in anomaly-likelihood)
    :param timestamp: (optional) date timestamp when the sample occured
                                 (used in anomaly-likelihood)
    :returns: the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    # apply binary discretization if required
    if self._binaryThreshold is not None:
      if score >= self._binaryThreshold:
        score = 1.0
      else:
        score = 0.0

    return score


  def __str__(self):
    windowSize = 0
    if self._movingAverage is not None:
      windowSize = self._movingAverage.windowSize
    return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)


  def __eq__(self, other):
    return (isinstance(other, Anomaly) and
            other._mode == self._mode and
            other._binaryThreshold == self._binaryThreshold and
            other._movingAverage == self._movingAverage and
            other._likelihood == self._likelihood)


  def __setstate__(self, state):
    """deserialization"""
    self.__dict__.update(state)

    if not hasattr(self, '_mode'):
      self._mode = Anomaly.MODE_PURE
    if not hasattr(self, '_movingAverage'):
      self._movingAverage = None
    if not hasattr(self, '_binaryThreshold'):
      self._binaryThreshold = None
Ejemplo n.º 9
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayAnomalyParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """
    # Generate swarm params
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
      metricData=[0],
      minVal=stats["min"],
      maxVal=stats["max"],
      minResolution=stats.get("minResolution"))

    model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
    model.enableLearning()
    model.enableInference(swarmParams["inferenceArgs"])

    return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyProbability):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyProbability: computed anomaly probability value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, inputRow):
    """ Compute anomaly log likelihood score

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=inputRow[1],
      anomalyScore=rawAnomalyScore,
      timestamp=inputRow[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyProbability = self._computeAnomalyProbability(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyProbability=anomalyProbability)
Ejemplo n.º 10
0
def run_model(model, a, b, save=True, aggregate=False, string=''):
    """Runs the HTM model and generates the anomaly scores.
    Arguments:
        :model: the model created with create_model().
        :a: the beginning of the anylized signal.
        :b: the end of the anylized signal.
        :save: if True then the anomalies output will be saved as .txt.
        :string: the string to differentiate the name of the saved .txt files.
    """

    ######################### open the signs ###########################################
    if aggregate == True:
        signal, time_vect = aggregate_(a, b)
        print("the size of signal is: {i}".format(i=np.size(signal)))

    else:
        signal = open_signs()
        signal = signal[a:b, 1]
    #-----------------------------------------------------------------------------------

    ##################### declare the anomalies lists ##################################
    anom_scores = []
    anom_likelihood = []
    anom_loglikelihood = []
    #-----------------------------------------------------------------------------------

    ##################### declare the predicted list ###################################
    predictions_1 = []
    predictions_5 = []
    predictions_1.append(0)
    for i in range(5):
        predictions_5.append(
            0
        )  # as this prediction is always made 1 step ahead, then the first value predicted will be ...
        # the prediction of the index with number 1, therefore doesn't exist a prediction of the 0 ...
        # index. The same problem occurs with the last signal, because it will predict one more ...
        # step ahead, this means that after seen the last signal "A", it will predict "A+1" even it doesnt ...
        # having a matching value in the signal array.
    #-----------------------------------------------------------------------------------

    ################ declare the Anom likelihood class #################################
    likelihood = AnomalyLikelihood(learningPeriod=300)
    #-----------------------------------------------------------------------------------

    for counter, value in enumerate(
            signal
    ):  # iterate over each value in the signal array, the  counter is used for debugging purposes

        ############ declare the dict which will be passed to the model ###############
        inputRecords = {
        }  # the model only accepts data in a specific dict format ...
        inputRecords['c1'] = float(value)  # this format is shown here:
        #-------------------------------------------------------------------------------

        ############ run the HTM model over the inputRecords dict ######################
        result = model.run(inputRecords)
        #-------------------------------------------------------------------------------

        ############ compute the anomaly likelihood and loglikelihood ###################
        current_likelihood = likelihood.anomalyProbability(
            value, result.inferences["anomalyScore"], timestamp=None)
        current_loglikelihood = likelihood.computeLogLikelihood(
            current_likelihood)
        #--------------------------------------------------------------------------------
        ################################ PREDICTIONS ####################################
        bestPredictions = result.inferences[
            "multiStepBestPredictions"]  # obtain the predicted value from infereces dict
        predictions_1.append(bestPredictions[1])
        predictions_5.append(
            bestPredictions[5])  # append the value to the _predict array

        #--------------------------------------------------------------------------------

        ########### add the anomaly values to the respective list #######################
        anom_scores.append(result.inferences["anomalyScore"])
        anom_likelihood.append(current_likelihood)
        anom_loglikelihood.append(current_loglikelihood)
        #--------------------------------------------------------------------------------
        ################# print the input and prediction, for debugging purposes ########
        if counter % 1 == 0:
            #print("Actual input [%d]: %f" % (counter, value))
            print(
                'prediction of [{0}]:(input) {1:8} (1-step) {2:8} (5-step) {3:8}'
                .format(counter, value, predictions_1[counter],
                        predictions_5[counter]))
            #print("Input[%d]: %f" % (counter+1,signal[counter+1]))
            #print("Multi Step Predictions: %s" % (result.inferences["multiStepPredictions"]))
            #print("\n")
        #--------------------------------------------------------------------------------

    ################# save the anomaly and prediction array #########################
    if save == True:
        np.savetxt("anom_score_" + string + ".txt", anom_scores, delimiter=','
                   )  # the "string" is to differentiate the training and ...
        # the online learning outputs.

        np.savetxt("anom_likelihood_" + string + ".txt",
                   anom_likelihood,
                   delimiter=',')

        np.savetxt("anom_logscore_" + string + ".txt",
                   anom_loglikelihood,
                   delimiter=',')

        np.savetxt("anom_prediction_1" + string + ".txt",
                   predictions_1,
                   delimiter=',')

        np.savetxt("anom_prediction_5" + string + ".txt",
                   predictions_5,
                   delimiter=',')
Ejemplo n.º 11
0
class AnomalyLikelihoodRegion(PyRegion):
  """Region for computing the anomaly likelihoods."""


  @classmethod
  def getSpec(cls):
    return {
      "description": ("Region that computes anomaly likelihoods for \
                       temporal memory."),
      "singleNodeOnly": True,
      "inputs": {
        "rawAnomalyScore": {
          "description": "The anomaly score whose \
                          likelihood is to be computed",
          "dataType": "Real32",
          "count": 1,
          "required": True,
          "isDefaultInput": False
        },
        "metricValue": {
          "description": "The input metric value",
          "dataType": "Real32",
          "count": 1,
          "required": True,
          "isDefaultInput": False
        },
      },
      "outputs": {
        "anomalyLikelihood": {
          "description": "The resultant anomaly likelihood",
          "dataType": "Real32",
          "count": 1,
          "isDefaultOutput": True,
        },
      },
      "parameters": {
        "learningPeriod": {
          "description": "The number of iterations required for the\
                          algorithm to learn the basic patterns in the dataset\
                          and for the anomaly score to 'settle down'.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 288,
          "accessMode": "ReadWrite"
        },
        "estimationSamples": {
          "description": "The number of reasonable anomaly scores\
                           required for the initial estimate of the\
                           Gaussian.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 100,
          "accessMode": "ReadWrite"
        },
        "historicWindowSize": {
          "description": "Size of sliding window of historical data\
                          points to maintain for periodic reestimation\
                          of the Gaussian.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 8640,
          "accessMode": "ReadWrite"
        },
        "reestimationPeriod": {
          "description": "How often we re-estimate the Gaussian\
                          distribution.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 100,
          "accessMode": "ReadWrite"
        },
      },
      "commands": {
      },
    }


  def __init__(self,
               learningPeriod = 288,
               estimationSamples = 100,
               historicWindowSize = 8640,
               reestimationPeriod = 100):
    self.anomalyLikelihood = AnomalyLikelihood(
      learningPeriod = learningPeriod,
      estimationSamples = estimationSamples,
      historicWindowSize = historicWindowSize,
      reestimationPeriod = reestimationPeriod)

  def __eq__(self, other):
    return self.anomalyLikelihood == other.anomalyLikelihood


  def __ne__(self, other):
    return not self == other


  @classmethod
  def read(cls, proto):
    anomalyLikelihoodRegion = object.__new__(cls)
    anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(proto)

    return anomalyLikelihoodRegion


  def write(self, proto):
    self.anomalyLikelihood.write(proto)


  def initialize(self):
    pass


  def compute(self, inputs, outputs):
    anomalyScore = inputs["rawAnomalyScore"][0]
    value = inputs["metricValue"][0]
    anomalyProbability = self.anomalyLikelihood.anomalyProbability(
      value, anomalyScore)
    outputs["anomalyLikelihood"][0] = anomalyProbability
Ejemplo n.º 12
0
class Monitor(object):
    """ A NuPIC model that saves results to Redis. """

    def __init__(self, config):

        # Instantiate NuPIC model
        model_params = base_model_params.MODEL_PARAMS

        # Set resolution
        model_params['modelParams']['sensorParams']['encoders']['value']['resolution'] = config['resolution']

        # Override other Nupic parameters:
        model_params['modelParams'] = update_dict(model_params['modelParams'], config['nupic_model_params'])

        # Create model and enable inference on it
        self.model = ModelFactory.create(model_params)
        self.model.enableInference({'predictedField': 'value'})

        # The shifter is used to bring the predictions to the actual time frame
        self.shifter = InferenceShifter()

        # The anomaly likelihood object
        self.anomalyLikelihood = AnomalyLikelihood()

        # Set stream source
        self.stream = config['stream']

        # Setup class variables
        self.db = redis.Redis('localhost')
        self.seconds_per_request = config['seconds_per_request']
        self.webhook = config['webhook']
        self.anomaly_threshold = config['anomaly_threshold']
        self.likelihood_threshold = config['likelihood_threshold']
        self.domain = config['domain']
        self.alert = False # Toogle when we get above threshold

        # Setup logging
        self.logger =  logger or logging.getLogger(__name__)
        handler = logging.handlers.RotatingFileHandler(os.environ['LOG_DIR']+"/monitor_%s.log" % self.stream.name,
                                                       maxBytes=1024*1024,
                                                       backupCount=4,
                                                      )

        handler.setFormatter(logging.Formatter('[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s'))
        handler.setLevel(logging.INFO)
        self.logger.addHandler(handler)
        self.logger.setLevel(logging.INFO)

        self.logger.info("=== Settings ===")
        self.logger.info("Webhook: %s", self.webhook)
        self.logger.info("Domain: %s", self.domain)
        self.logger.info("Seconds per request: %d", self.seconds_per_request)

        # Write metadata to Redis
        try:
            # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly'
            self.db.set('name:%s' % self.stream.id, self.stream.name)
            self.db.set('value_label:%s' % self.stream.id, self.stream.value_label)
            self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit)
        except Exception:
            self.logger.warn("Could not write results to redis.", exc_info=True)

    def train(self):
        data = self.stream.historic_data()

        for model_input in data:
            self.update(model_input, False) # Don't post anomalies in training

    def loop(self):
        while True:
            data = self.stream.new_data()

            for model_input in data:
                self.update(model_input, True) # Post anomalies when online

            sleep(self.seconds_per_request)

    def update(self, model_input, is_to_post):
        # Pass the input to the model
        result = self.model.run(model_input)

        # Shift results
        result = self.shifter.shift(result)

        # Save multi step predictions
        inference = result.inferences['multiStepPredictions']

        # Take the anomaly_score
        anomaly_score = result.inferences['anomalyScore']

        # Compute the Anomaly Likelihood
        likelihood = self.anomalyLikelihood.anomalyProbability(model_input['value'],
                                                               anomaly_score,
                                                               model_input['time'])

        # Get the predicted value for reporting
        predicted = result.inferences['multiStepBestPredictions'][1]

        # Get timestamp from datetime
        timestamp = calendar.timegm(model_input['time'].timetuple())

        self.logger.info("Processing: %s", strftime("%Y-%m-%d %H:%M:%S", model_input['time'].timetuple()))

        # Save results to Redis
        if inference[1]:
            try:
                # Save in redis with key = 'results:monitor_id' and value = 'time, raw_value, actual, prediction, anomaly'
                # * actual: is the value processed  by the NuPIC model, which can be
                #           an average of raw_values
                # * predicition: prediction based on 'actual' values.
                self.db.rpush('results:%s' % self.stream.id,
                              '%s,%.5f,%.5f,%.5f,%.5f,%.5f' % (timestamp,
                                                          model_input['raw_value'],
                                                          result.rawInput['value'],
                                                          predicted,
                                                          anomaly_score,
                                                          likelihood))
                max_items = 10000
                ln = self.db.llen('results:%s' % self.stream.id)
                if ln > max_items:
                    self.db.ltrim('results:%s' % self.stream.id, ln - max_items, ln)
            except Exception:
                self.logger.warn("Could not write results to redis.", exc_info=True)

        # See if above threshold (in which case anomalous is True)
        anomalous = False
        if self.anomaly_threshold is not None:
            if anomaly_score >= self.anomaly_threshold:
                anomalous = True
        if self.likelihood_threshold is not None:
            if likelihood >= self.likelihood_threshold:
                anomalous = True

        # Post if webhook is not None
        if is_to_post and self.webhook is not None:
            # Check if it was in alert state in previous time step
            was_alerted = self.alert
            # Update alert state
            self.alert = anomalous

            # Send notification if webhook is set and if:
            # was not alerted before and is alerted now (entered anomalous state)
            # or
            # was alerted before and is not alerted now (left anomalous state)
            if not was_alerted and self.alert:
                report = {'anomaly_score': anomaly_score,
                          'likelihood': likelihood,
                          'model_input': {'time': model_input['time'].isoformat(),
                                          'value': model_input['raw_value']}}
                self._send_post(report)

        # Return anomalous state
        return {"likelihood" : likelihood,  "anomalous" : anomalous, "anomalyScore" : anomaly_score, "predicted" : predicted}

    def delete(self):
        """ Remove this monitor from redis """

        self.db.delete("results:%s" % self.stream.id)
        self.db.delete('name:%s' % self.stream.id)
        self.db.delete('value_label:%s' % self.stream.id)
        self.db.delete('value_unit:%s' % self.stream.id)

    def _send_post(self, report):
        """ Send HTTP POST notification. """

        if "hooks.slack.com" not in self.webhook:
            payload = {'sent_at': datetime.utcnow().isoformat(),
                       'report': report,
                       'monitor': self.stream.name,
                       'source': type(self.stream).__name__,
                       'metric': '%s (%s)' % (self.stream.value_label, self.stream.value_unit),
                       'chart': 'http://%s?id=%s' % (self.domain, self.stream.id)}
        else:
            payload = {'username': '******',
                       'icon_url': 'https://rawgithub.com/cloudwalkio/omg-monitor/slack-integration/docs/images/post_icon.png',
                       'text':  'Anomalous state in *%s* from _%s_:' % (self.stream.name, type(self.stream).__name__),
                       'attachments': [{'color': 'warning',
                                        'fields': [{'title': 'Chart',
                                                    'value':  'http://%s?id=%s' % (self.domain, self.stream.id),
                                                    'short': False},
                                                   {'title': 'Metric',
                                                    'value': self.stream.value_label,
                                                    'short': True},
                                                   {'title': 'Value',
                                                    'value': str(report['model_input']['value']) + ' ' + self.stream.value_unit,
                                                    'short': True}]}]}

        headers = {'Content-Type': 'application/json'}
        try:
            response = requests.post(self.webhook, data=json.dumps(payload), headers=headers)
        except Exception:
            self.logger.warn('Failed to post anomaly.', exc_info=True)
            return

        self.logger.info('Anomaly posted with status code %d: %s', response.status_code, response.text)
        return
Ejemplo n.º 13
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self,
                 slidingWindowSize=None,
                 mode=MODE_PURE,
                 binaryAnomalyThreshold=None):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score
         will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
         The transformation is applied after moving average is computed and updated.
    """
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)
        self._binaryThreshold = binaryAnomalyThreshold
        if binaryAnomalyThreshold is not None and (
                not isinstance(binaryAnomalyThreshold, float)
                or binaryAnomalyThreshold >= 1.0
                or binaryAnomalyThreshold <= 0.0):
            raise ValueError(
                "Anomaly: binaryAnomalyThreshold must be from (0,1) "
                "or None if disabled.")

    def compute(self,
                activeColumns,
                predictedColumns,
                inputValue=None,
                timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            if inputValue is None:
                raise ValueError(
                    "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                    "requires 'inputValue' as parameter to compute() method. ")

            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            # low likelihood -> hi anomaly
            score = 1 - probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            score = anomalyScore * (1 - probability)

        # Last, do moving-average if windowSize was specified.
        if self._movingAverage is not None:
            score = self._movingAverage.next(score)

        # apply binary discretization if required
        if self._binaryThreshold is not None:
            if score >= self._binaryThreshold:
                score = 1.0
            else:
                score = 0.0

        return score

    def __str__(self):
        windowSize = 0
        if self._movingAverage is not None:
            windowSize = self._movingAverage.windowSize
        return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)

    def __setstate__(self, state):
        """deserialization"""
        self.__dict__.update(state)

        if not hasattr(self, '_mode'):
            self._mode = Anomaly.MODE_PURE
        if not hasattr(self, '_movingAverage'):
            self._movingAverage = None
        if not hasattr(self, '_binaryThreshold'):
            self._binaryThreshold = None
Ejemplo n.º 14
0
def foreach_batch_function(df, epoch_id):
    # Transform and write batchDF
    row = df.collect()
    print "Size of Batch"
    print(len(row))
    if len(row) != 0:
        for x in range(len(row)):
            nb = nb + 1
            record = {}
            level = row[x]['level']
            #print(type(level))
            timestamp = row[x]['@timestamp']
            #print(type(timestamp))
            #print(timestamp)
            #timestamp = timestamp.encode("utf-8")
            level = level.encode("utf-8")

            if level == 'INFO' or level == 'info':
                level = 'info'
            elif level == 'ERROR' or level == 'error':
                level = 'error'
            else:
                level = 'warning'
            #print 'step 2'
            record = {"timestamp": timestamp, "level": level}
            print(record)
            result = model.run(record)
            anom = result.inferences['anomalyScore']
            #print(anom)
            record_anomalies.append(anom)
            #print "Lengths of record anomalies"
            #print(len(record_anomalies))
            mean_anomalies = np.mean(record_anomalies)
            std_anomalies = np.std(record_anomalies)
            if std_anomalies == 0:
                std_anomalies = 0.00001
            var_anomalies = np.var(record_anomalies)
            mean_anomalies_short_window = np.mean(
                record_anomalies[-int(history):])

            likelihood = 1 - (
                (norm.cdf(anom, mean_anomalies_short_window - mean_anomalies,
                          std_anomalies)) -
                (norm.cdf(0, mean_anomalies_short_window - mean_anomalies,
                          std_anomalies)))

            likelihood_test = 1 - (
                anom -
                (mean_anomalies_short_window - mean_anomalies)) / std_anomalies
            likelihood_test_test = 1 - qfunction(
                (mean_anomalies_short_window - mean_anomalies) / std_anomalies)
            print "Likelihood"
            print(likelihood_test_test)
            anomalyLikelihood = AnomalyLikelihood()
            anomalyProbability = anomalyLikelihood.anomalyProbability(
                record['level'], anom, record['timestamp'])
            ani = animation.FuncAnimation(fig,
                                          animate,
                                          interval=1000,
                                          x=nb,
                                          y=likelihood_test_test)
            plt.show()
            if likelihood_test_test >= 0.85:
                print "Anomaly detected!"
                print "Probability od being abnormal", likelihood_test_test
                #ibefore = i
                #if ibefore - iafter == 1:
                #    region = region + 1
                #    if region == 20:
                #        print i-20
                #        print 'Anomaly detcted!'
                #        print 'Probability of being abnormal', likelihood_test_test
                #        print 'Probability of being abnormal (nupic)', anomalyProbability
                #        region_anomaly = region_anomaly + 1
                #else :
                #    region = 0

                #iafter = ibefore
    pass
Ejemplo n.º 15
0
    input_event = (numpy.array([x, y, z]), radius)
    timestamp = datetime.datetime.strptime(event.time, "%Y-%m-%dT%H:%M:%S.%fZ")
    # input_event = (timestamp, input_event)
    modelInput = {}
    modelInput["event"] = input_event
    modelInput["timestamp"] = (timestamp)
    result = model.run(modelInput)
    model.save(MODELSTATE)
    # print result

    
    if not PREDICT:
      # Anomaly-Stats: 
      anomalyScore = result.inferences["anomalyScore"]
      # By default 0.5 for the first 600 iterations!
      likelihood = anomalyLikelihood.anomalyProbability(modelInput["event"], anomalyScore, modelInput["timestamp"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
      AnomalyScores.append(anomalyScore)
      LikelihoodScores.append([modelInput["timestamp"], modelInput["event"], likelihood])
      prediction = 'None'


    if PREDICT:
      # Handle Anomaly:
      anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None'
      pred_result = shifter.shift(result)
      if result.inferences["multiStepBestPredictions"][1]:
        prediction = result.inferences["multiStepBestPredictions"][1]
        print prediction
      else:
        prediction = 'None'
Ejemplo n.º 16
0
    #print "prediction: ", result.inferences["multiStepBestPredictions"][1]
    anom = result.inferences['anomalyScore']
    #print "anomaly score: ", anom
    record_anomalies.append(anom)
    mean_anomalies = np.mean(record_anomalies)
    std_anomalies = np.std(record_anomalies)
    var_anomalies = np.var(record_anomalies)
    mean_anomalies_short_window = np.mean(record_anomalies[-int(0.05 * i):])
    #likelihood = 1-((norm.cdf(anom, mean_anomalies_short_window-mean_anomalies, std_anomalies))-(norm.cdf(0, mean_anomalies_short_window-mean_anomalies, std_anomalies)))
    likelihood = 1 - ((norm.cdf(
        anom, mean_anomalies_short_window - mean_anomalies, std_anomalies)) -
                      (norm.cdf(0, mean_anomalies_short_window -
                                mean_anomalies, std_anomalies)))
    #print "likelihood score: ", likelihood
    anomalyLikelihood = AnomalyLikelihood()
    anomalyProbability = anomalyLikelihood.anomalyProbability(
        record['level'], anom, record['timestamp'])
    #print "anomalyProbability: ", anomalyProbability
    if likelihood >= 1:
        cpt = cpt + 1
        print i
        print "Anomaly detected!"

print "Total nb of anomalies", cpt
"""
data = getData()
for _ in xrange(5):
    print data.next()

with open('export_dataframe_df2.csv') as inputFile:
    print
    for _ in xrange(8):
Ejemplo n.º 17
0
      # Handle Anomaly:
      anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None'
      pred_result = shifter.shift(result)
      if result.inferences["multiStepBestPredictions"][1]:
        prediction = result.inferences["multiStepBestPredictions"][1]
        print prediction
      else:
        prediction = 'None'


    if not PREDICT or prediction == 'None':
      # Anomaly-Stats: 
      anomalyScore = result.inferences["anomalyScore"]
      AnomalyScores.append(anomalyScore)
      # By default 0.5 for the first 600 iterations! TODO: Still not quite sure if that's alright...
      likelihood = anomalyLikelihood.anomalyProbability(event[0] + numpy.array([event[1]]), anomalyScore, modelInput["timestamp"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
      LikelihoodScores.append([modelInput["timestamp"], modelInput["event"], likelihood])
      prediction = 'None'


    # NOTE: change mag to scalar -more general! -Typecasting for DB
    data = {"eventType": str(event.type),
            "lat": float(event.latitude),
            "lng": float(event.longitude),
            "depth": float(event.depth),
            "scalar": float(event.mag),
            "timestamp": str(event.time),
            "AnomalyScore": float(anomalyScore),
            "Anomaly_mean": (float(numpy.mean(AnomalyScores)), WINDOWSIZE),
            "AnomalyLikelihood": float(likelihood),
Ejemplo n.º 18
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """


  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    self._useMovingAverage = slidingWindowSize > 0
    self._buf = None
    self._i = None

    # Using cumulative anomaly, sliding window
    if self._useMovingAverage:
      self._windowSize = slidingWindowSize
      # Sliding window buffer
      self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
      self._i = 0 # index pointer to actual position
    elif slidingWindowSize is not None:
      raise TypeError(
          "Anomaly: if you define slidingWindowSize, it has to be an "
          "integer > 0;  slidingWindowSize=%r" % slidingWindowSize)

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)


  def computeAnomalyScore(self, activeColumns, predictedColumns, value=None,
                          timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param value: (optional) metric value of current input
                              (used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              (used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      # TODO add tests for likelihood modes
      probability = self._likelihood.anomalyProbability(
          value, anomalyScore, timestamp)
      score = probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          value, anomalyScore, timestamp)
      score = anomalyScore * probability

    # Last, do moving-average if windowSize was specified.
    if self._useMovingAverage:
      score = self._movingAverage(score)

    return score


  def _movingAverage(self, newElement=None):
    """moving average

    @param newValue (optional) add a new element before computing the avg
    @return moving average of self._windowSize last elements
    """
    if newElement is not None:
      self._buf[self._i]= newElement
      self._i = (self._i + 1) % self._windowSize
    return self._buf.sum() / float(self._windowSize)  # normalize to 0..1
Ejemplo n.º 19
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self, slidingWindowSize=None, mode=MODE_PURE):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
        self._mode = mode
        self._useMovingAverage = slidingWindowSize > 0
        self._buf = None
        self._i = None

        # Using cumulative anomaly, sliding window
        if self._useMovingAverage:
            self._windowSize = slidingWindowSize
            # Sliding window buffer
            self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
            self._i = 0  # index pointer to actual position
        elif slidingWindowSize is not None:
            raise TypeError(
                "Anomaly: if you define slidingWindowSize, it has to be an "
                "integer > 0;  slidingWindowSize=%r" % slidingWindowSize)

        if self._mode == Anomaly.MODE_LIKELIHOOD:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)

    def computeAnomalyScore(self,
                            activeColumns,
                            predictedColumns,
                            value=None,
                            timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param value: (optional) metric value of current input
                              (used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              (used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            # TODO add tests for likelihood modes
            probability = self._likelihood.anomalyProbability(
                value, anomalyScore, timestamp)
            score = probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                value, anomalyScore, timestamp)
            score = anomalyScore * probability

        # Last, do moving-average if windowSize was specified.
        if self._useMovingAverage:
            score = self._movingAverage(score)

        return score

    def _movingAverage(self, newElement=None):
        """moving average

    @param newValue (optional) add a new element before computing the avg
    @return moving average of self._windowSize last elements
    """
        if newElement is not None:
            self._buf[self._i] = newElement
            self._i = (self._i + 1) % self._windowSize
        return self._buf.sum() / float(self._windowSize)  # normalize to 0..1
def runAnomaly(options):
    """
  Create and run a CLA Model on the given dataset (based on the hotgym anomaly
  client in NuPIC).
  """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print "Using resolution value: {0}".format(resolution)
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    with open(options.inputFile) as fin:

        # Open file and setup headers
        # Here we write the log likelihood value as the 'anomaly score'
        # The actual CLA outputs are labeled 'raw anomaly score'
        reader = csv.reader(fin)
        csvWriter = csv.writer(open(options.outputFile, "wb"))
        csvWriter.writerow([
            "timestamp", "value", "_raw_score", "likelihood_score",
            "log_likelihood_score"
        ])
        headers = reader.next()

        # The anomaly likelihood object
        anomalyLikelihood = AnomalyLikelihood()

        # Iterate through each record in the CSV file
        print "Starting processing at", datetime.datetime.now()
        for i, record in enumerate(reader, start=1):

            # Convert input data to a dict so we can pass it into the model
            inputData = dict(zip(headers, record))
            inputData["value"] = float(inputData["value"])
            inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
            #inputData["dttm"] = datetime.datetime.now()

            # Send it to the CLA and get back the raw anomaly score
            result = model.run(inputData)
            anomalyScore = result.inferences['anomalyScore']

            # Compute the Anomaly Likelihood
            likelihood = anomalyLikelihood.anomalyProbability(
                inputData["value"], anomalyScore, inputData["dttm"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
            if likelihood > 0.9999:
                print "Anomaly detected:", inputData['dttm'], inputData[
                    'value'], likelihood

            # Write results to the output CSV file
            csvWriter.writerow([
                inputData["dttm"], inputData["value"], anomalyScore,
                likelihood, logLikelihood
            ])

            # Progress report
            if (i % 1000) == 0: print i, "records processed"

    print "Completed processing", i, "records at", datetime.datetime.now()
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
Ejemplo n.º 21
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """


  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)


  def compute(self, activeColumns, predictedColumns, 
				inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    return score
Ejemplo n.º 22
0
def runAnomaly(options):
    global g_ps_count_dict_unsorted
    global g_abnomal_data_dict_unsorted
    """
    Create and run a CLA Model on the given dataset (based on the hotgym anomaly
    client in NuPIC).
    """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    if options.oswpsDir != "":
        # Get PS dictionary
        osw = OSWData(options.oswpsDir, PS)
        osw.traverse_dir()
        g_ps_count_dict_unsorted = osw.get_ps_dict()
        options.max = ps_max_value = max(g_ps_count_dict_unsorted.values())
        options.min = ps_min_value = min(g_ps_count_dict_unsorted.values())
        print("Min value:" + str(ps_min_value) + ', ' + "Max value:" +
              str(ps_max_value))

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print("Using resolution value: {0}".format(resolution))
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    if options.inputFile != "":
        with open(options.inputFile) as fin:
            # Open file and setup headers
            # Here we write the log likelihood value as the 'anomaly score'
            # The actual CLA outputs are labeled 'raw anomaly score'
            reader = csv.reader(fin)
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            headers = reader.next()

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, record in enumerate(reader, start=1):

                # Convert input data to a dict so we can pass it into the model
                inputData = dict(zip(headers, record))
                inputData["value"] = float(inputData["value"])
                inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"
    elif options.oswpsDir != "":
        if options.use_rtm == True:
            rtm_sensitivity = 2
            rtm = LinearRegressionTemoporalMemory(window=10,
                                                  interval=10,
                                                  min_=options.min,
                                                  max_=options.max,
                                                  boost=rtm_sensitivity,
                                                  leak_detection=0,
                                                  critical_region="right_tail",
                                                  debug=0)
            g_abnomal_data_dict_unsorted = rtm.analyze(
                g_ps_count_dict_unsorted)
        else:
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            ps_od = collections.OrderedDict(
                sorted(g_ps_count_dict_unsorted.items()))

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, timestamp in enumerate(ps_od):
                ps_count = ps_od[timestamp]

                inputData = {}
                inputData["value"] = float(ps_count)
                inputData["dttm"] = dateutil.parser.parse(timestamp)
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood
                    g_abnomal_data_dict_unsorted[timestamp] = ps_count

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"

            print "Completed processing", i, "records at", datetime.datetime.now(
            )
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
Ejemplo n.º 23
0
class Monitor(object):
    """ A NuPIC model that saves results to Redis. """
    def __init__(self, config):

        # Instantiate NuPIC model
        model_params = base_model_params.MODEL_PARAMS
        model_params['modelParams']['sensorParams']['encoders']['value'][
            'resolution'] = config['resolution']

        self.model = ModelFactory.create(model_params)

        self.model.enableInference({'predictedField': 'value'})

        # The shifter is used to bring the predictions to the actual time frame
        self.shifter = InferenceShifter()

        # The anomaly likelihood object
        self.anomalyLikelihood = AnomalyLikelihood()

        # Set stream source
        self.stream = config['stream']

        # Setup class variables
        self.db = redis.Redis('localhost')
        self.seconds_per_request = config['seconds_per_request']
        self.webhook = config['webhook']
        self.anomaly_threshold = config['anomaly_threshold']
        self.likelihood_threshold = config['likelihood_threshold']
        self.domain = config['domain']
        self.alert = False  # Toogle when we get above threshold

        # Setup logging
        self.logger = logger or logging.getLogger(__name__)
        handler = logging.handlers.RotatingFileHandler(
            os.environ['LOG_DIR'] + "/monitor_%s.log" % self.stream.name,
            maxBytes=1024 * 1024,
            backupCount=4,
        )

        handler.setFormatter(
            logging.Formatter(
                '[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s'
            ))
        handler.setLevel(logging.INFO)
        self.logger.addHandler(handler)
        self.logger.setLevel(logging.INFO)

        self.logger.info("=== Settings ===")
        self.logger.info("Webhook: %s", self.webhook)
        self.logger.info("Domain: %s", self.domain)
        self.logger.info("Seconds per request: %d", self.seconds_per_request)

        # Write metadata to Redis
        try:
            # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly'
            self.db.set('name:%s' % self.stream.id, self.stream.name)
            self.db.set('value_label:%s' % self.stream.id,
                        self.stream.value_label)
            self.db.set('value_unit:%s' % self.stream.id,
                        self.stream.value_unit)
        except Exception:
            self.logger.warn("Could not write results to redis.",
                             exc_info=True)

    def train(self):
        data = self.stream.historic_data()

        for model_input in data:
            self.update(model_input, False)  # Don't post anomalies in training

    def loop(self):
        while True:
            data = self.stream.new_data()

            for model_input in data:
                self.update(model_input, True)  # Post anomalies when online

            sleep(self.seconds_per_request)

    def update(self, model_input, is_to_post):
        # Pass the input to the model
        result = self.model.run(model_input)

        # Shift results
        result = self.shifter.shift(result)

        # Save multi step predictions
        inference = result.inferences['multiStepPredictions']

        # Take the anomaly_score
        anomaly_score = result.inferences['anomalyScore']

        # Compute the Anomaly Likelihood
        likelihood = self.anomalyLikelihood.anomalyProbability(
            model_input['value'], anomaly_score, model_input['time'])

        # Get the preducted value for reporting
        predicted = result.inferences['multiStepBestPredictions'][1]

        # Get timestamp from datetime
        timestamp = calendar.timegm(model_input['time'].timetuple())

        self.logger.info(
            "Processing: %s",
            strftime("%Y-%m-%d %H:%M:%S", model_input['time'].timetuple()))

        # Save results to Redis
        if inference[1]:
            try:
                # Save in redis with key = 'results:monitor_id' and value = 'time, raw_value, actual, prediction, anomaly'
                # * actual: is the value processed  by the NuPIC model, which can be
                #           an average of raw_values
                # * predicition: prediction based on 'actual' values.
                self.db.rpush(
                    'results:%s' % self.stream.id,
                    '%s,%.5f,%.5f,%.5f,%.5f,%.5f' %
                    (timestamp, model_input['raw_value'],
                     result.rawInput['value'], predicted, anomaly_score,
                     likelihood))
                max_items = 10000
                ln = self.db.llen('results:%s' % self.stream.id)
                if ln > max_items:
                    self.db.ltrim('results:%s' % self.stream.id,
                                  ln - max_items, ln)
            except Exception:
                self.logger.warn("Could not write results to redis.",
                                 exc_info=True)

        # See if above threshold (in which case anomalous is True)
        anomalous = False
        if self.anomaly_threshold is not None:
            if anomaly_score >= self.anomaly_threshold:
                anomalous = True
        if self.likelihood_threshold is not None:
            if likelihood >= self.likelihood_threshold:
                anomalous = True

        # Post if webhook is not None
        if is_to_post and self.webhook is not None:
            # Check if it was in alert state in previous time step
            was_alerted = self.alert
            # Update alert state
            self.alert = anomalous

            # Send notification if webhook is set and if:
            # was not alerted before and is alerted now (entered anomalous state)
            # or
            # was alerted before and is not alerted now (left anomalous state)
            if not was_alerted and self.alert:
                report = {
                    'anomaly_score': anomaly_score,
                    'likelihood': likelihood,
                    'model_input': {
                        'time': model_input['time'].isoformat(),
                        'value': model_input['raw_value']
                    }
                }
                self._send_post(report)

        # Return anomalous state
        return {
            "likelihood": likelihood,
            "anomalous": anomalous,
            "anomalyScore": anomaly_score,
            "predicted": predicted
        }

    def delete(self):
        """ Remove this monitor from redis """

        self.db.delete("results:%s" % self.stream.id)
        self.db.delete('name:%s' % self.stream.id)
        self.db.delete('value_label:%s' % self.stream.id)
        self.db.delete('value_unit:%s' % self.stream.id)

    def _send_post(self, report):
        """ Send HTTP POST notification. """

        if "hooks.slack.com" not in self.webhook:
            payload = {
                'sent_at':
                datetime.utcnow().isoformat(),
                'report':
                report,
                'monitor':
                self.stream.name,
                'source':
                type(self.stream).__name__,
                'metric':
                '%s (%s)' % (self.stream.value_label, self.stream.value_unit),
                'chart':
                'http://%s?id=%s' % (self.domain, self.stream.id)
            }
        else:
            payload = {
                'username':
                '******',
                'icon_url':
                'https://rawgithub.com/cloudwalkio/omg-monitor/slack-integration/docs/images/post_icon.png',
                'text':
                'Anomalous state in *%s* from _%s_:' %
                (self.stream.name, type(self.stream).__name__),
                'attachments': [{
                    'color':
                    'warning',
                    'fields': [{
                        'title':
                        'Chart',
                        'value':
                        'http://%s?id=%s' % (self.domain, self.stream.id),
                        'short':
                        False
                    }, {
                        'title': 'Metric',
                        'value': self.stream.value_label,
                        'short': True
                    }, {
                        'title':
                        'Value',
                        'value':
                        str(report['model_input']['value']) + ' ' +
                        self.stream.value_unit,
                        'short':
                        True
                    }]
                }]
            }

        headers = {'Content-Type': 'application/json'}
        try:
            response = requests.post(self.webhook,
                                     data=json.dumps(payload),
                                     headers=headers)
        except Exception:
            self.logger.warn('Failed to post anomaly.', exc_info=True)
            return

        self.logger.info('Anomaly posted with status code %d: %s',
                         response.status_code, response.text)
        return
Ejemplo n.º 24
0
class Anomaly(object):
  """basic class that computes anomaly

     Anomaly is used to detect strange patterns/behaviors (outliners) 
     by a trained CLA model. 
  """



  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = [MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED]


  def __init__(self, slidingWindowSize = None, anomalyMode=MODE_PURE, 
               shiftPredicted=False):
    """
    @param (optional) slidingWindowSize -- enables moving average on final 
                      anomaly score; how many elements are summed up, 
                      sliding window size; int >= 0
    @param (optional) anomalyMode -- (string) how to compute anomaly;
                      possible values are: 
                         -- "pure" -- the default, how much anomal the value is; 
                                      float 0..1 where 1=totally unexpected
                         -- "likelihood" -- uses the anomaly_likelihood code; 
                                      models probability of receiving this 
                                      value and anomalyScore; used in Grok
                         -- "weighted" -- "pure" anomaly weighted by "likelihood" (anomaly * likelihood)
    @param shiftPredicted (optional) -- boolean [default=False]; 
                                      normally active vs predicted are compared
                          if shiftPredicted=True: predicted(T-1) vs active(T) 
                             are compared (eg from TP, CLAModel)
    """

    # using cumulative anomaly , sliding window
    if slidingWindowSize > 0:
      self._windowSize = slidingWindowSize
      #sliding window buffer
      self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
      self._i = 0 # index pointer to actual position
    elif slidingWindowSize is not None:
      raise Exception("Anomaly: if you define slidingWindowSize, \
      it has to be an integer > 0; \
      slidingWindowSize="+str(slidingWindowSize))

    # mode
    self._mode = anomalyMode
    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError('Invalid anomaly mode; only supported modes are: \
                       "Anomaly.MODE_PURE", "Anomaly.MODE_LIKELIHOOD", \
                       "Anomaly.MODE_WEIGHTED"; you used:' +self._mode)

    if shiftPredicted:
      self._prevPredictedColumns = numpy.array([])


  def computeAnomalyScore(self, activeColumns, predictedColumns, value=None, 
                          timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted
  
    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step 
                             (used for anomaly in step T+1)
    @param value: (optional) input value, that is what activeColumns represent
                              (used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured 
                              (used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """

    if hasattr(self, "_prevPredictedColumns"): # shiftPredicted==True
      prevPredictedColumns = self._prevPredictedColumns
      self._prevPredictedColumns = predictedColumns # to be used in step T+1
    else:
      prevPredictedColumns = predictedColumns

    # 1. here is the 'classic' anomaly score
    anomalyScore = computeRawAnomalyScore(activeColumns, prevPredictedColumns)

    # compute final anomaly based on selected mode
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp)
      score = probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp)
      score = anomalyScore * probability

    # last, do moving-average if windowSize is set
    if hasattr(self, "_windowSize"):
      score = self._movingAverage(score)

    return score


  def _movingAverage(self, newElement=None):
    """moving average

    @param newValue (optional) add a new element before computing the avg
    @return moving average of self._windowSize last elements
    """
    if newElement is not None:
      self._buf[self._i]= newElement
      self._i = (self._i + 1) % self._windowSize
    return self._buf.sum()/float(self._windowSize) # normalize to 0..1
Ejemplo n.º 25
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayAnomalyParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """
    # Generate swarm params
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
      metricData=[0],
      minVal=stats["min"],
      maxVal=stats["max"],
      minResolution=stats.get("minResolution"))

    model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
    model.enableLearning()
    model.enableInference(swarmParams["inferenceArgs"])

    return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyProbability):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyProbability: computed anomaly probability value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, inputRow):
    """ Compute anomaly log likelihood score

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=inputRow[1],
      anomalyScore=rawAnomalyScore,
      timestamp=inputRow[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyProbability = self._computeAnomalyProbability(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyProbability=anomalyProbability)
Ejemplo n.º 26
0
def runAnomaly(options):

    #define local params :

    inputArray = []  #holds all input data
    anomalyArray = []  #holds all output data
    inputThreshold = float(10)  #how many percent of intial samples to ignore
    anomCounter = 0  #counts number of anomalies

    [timeDataFinal, yvalues
     ] = interpolateFunction(inputFileNameInterpol,
                             inputFileNameLocal)  #interpolate the function

    with open("model_params.json") as fp:
        modelParams = json.load(fp)
        #pprint(modelParams)

    #JSON handling
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    #print numBuckets
    resolution = options.resolution

    #f**k is resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
        print "Using resolution value: {0}".format(resolution)
    sensorParams['encoders']['value']['resolution'] = resolution
    #print resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    with open(options.inputFile) as fin:

        #Open files
        #Setup headers
        reader = csv.reader(fin)
        headers = reader.next()

        # The anomaly likelihood object
        anomalyLikelihood = AnomalyLikelihood()

        #Iterate through each record in the CSV
        print "Starting processing at", datetime.datetime.now()
        for i, record in enumerate(reader, start=1):

            # Convert input data to a dict so we can pass it into the model
            inputData = dict(zip(headers, record))
            #print(inputData)
            inputData["value"] = float(inputData["value"])
            inputArray.append(inputData["value"])
            inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
            #print inputData

            # Send it to the CLA and get back the raw anomaly score
            result = model.run(inputData)

            #inferences call from nupic
            anomalyScore = result.inferences['anomalyScore']
            anomalyArray.append(anomalyScore)

            #comput likelihood - nupic call
            likelihood = anomalyLikelihood.anomalyProbability(
                inputData["value"], anomalyScore, inputData["dttm"])

        myPlotFunction(inputArray, anomalyArray,
                       inputThreshold)  #plot the output

        #print file
        interpolBool = False
        writeFunction(outputFileName, timeDataFinal, anomalyArray,
                      interpolBool)
Ejemplo n.º 27
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """


  def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec):
    """
    :param inputFileObj: A file-like object that contains input metric data
    :param dict inputSpec: Input data specification per input_opt_schema.json
    :param dict aggSpec: Optional aggregation specification per
      agg_opt_schema.json or None if no aggregation is requested
    :param dict modelSpec: Model specification per model_opt_schema.json
    """
    self._inputSpec = inputSpec

    self._aggSpec = aggSpec

    self._modelSpec = modelSpec

    if "modelId" in modelSpec:
      self._modelId = modelSpec["modelId"]
    else:
      self._modelId = "Unknown"


    inputRecordSchema = (
      fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"],
                              fieldmeta.FieldMetaType.datetime,
                              fieldmeta.FieldMetaSpecial.timestamp),
      fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"],
                              fieldmeta.FieldMetaType.float,
                              fieldmeta.FieldMetaSpecial.none),
    )

    self._aggregator = aggregator.Aggregator(
      aggregationInfo=dict(
        fields=([(modelSpec["valueFieldName"], aggSpec["func"])]
                if aggSpec is not None else []),
        seconds=aggSpec["windowSize"] if aggSpec is not None else 0
      ),
      inputFields=inputRecordSchema)

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=inputRecordSchema)

    self._model = self._createModel(modelSpec=modelSpec)

    self._anomalyLikelihood = AnomalyLikelihood()

    self._csvReader = self._createCsvReader(inputFileObj)


  @staticmethod
  def _createModel(modelSpec):
    """Instantiate and configure an OPF model

    :param dict modelSpec: Model specification per model_opt_schema.json

    :returns: OPF Model instance
    """

    model = ModelFactory.create(modelConfig=modelSpec["modelConfig"])
    model.enableLearning()
    model.enableInference(modelSpec["inferenceArgs"])

    return model


  @staticmethod
  def _createCsvReader(fileObj):
    # We'll be operating on csvs with arbitrarily long fields
    csv.field_size_limit(2**27)

    # Make sure readline() works on windows too
    os.linesep = "\n"

    return csv.reader(fileObj, dialect="excel")


  @classmethod
  def _emitOutputMessage(cls, dataRow, anomalyProbability):
    """Emit output message to stdout

    :param list dataRow: the two-tuple data row on which anomalyProbability was
      computed, whose first element is datetime timestamp and second element is
      the float scalar value
    :param float anomalyProbability: computed anomaly probability value
    """

    message = "%s\n" % (json.dumps([dataRow[0].isoformat(), dataRow[1], anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, fields):
    """ Compute anomaly log likelihood score

    :param tuple fields: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(fields)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=fields[1],
      anomalyScore=rawAnomalyScore,
      timestamp=fields[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """

    numRowsToSkip = self._inputSpec["rowOffset"]
    datetimeFormat = self._inputSpec["datetimeFormat"]
    inputRowTimestampIndex = self._inputSpec["timestampIndex"]
    inputRowValueIndex = self._inputSpec["valueIndex"]

    g_log.info("Processing model=%s", self._modelId)

    for inputRow in self._csvReader:
      g_log.debug("Got inputRow=%r", inputRow)

      if numRowsToSkip > 0:
        numRowsToSkip -= 1
        g_log.debug("Skipping header row %s; %s rows left to skip",
                    inputRow, numRowsToSkip)
        continue

      # Extract timestamp and value
      # NOTE: the order must match the `inputFields` that we passed to the
      # Aggregator constructor
      fields = [
        date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex],
                                      datetimeFormat),
        float(inputRow[inputRowValueIndex])
      ]

      # Aggregate
      aggRow, _ = self._aggregator.next(fields, None)
      g_log.debug("Aggregator returned %s for %s", aggRow, fields)
      if aggRow is not None:
        self._emitOutputMessage(
          dataRow=aggRow,
          anomalyProbability=self._computeAnomalyProbability(aggRow))


    # Reap remaining data from aggregator
    aggRow, _ = self._aggregator.next(None, curInputBookmark=None)
    g_log.debug("Aggregator reaped %s in final call", aggRow)
    if aggRow is not None:
      self._emitOutputMessage(
        dataRow=aggRow,
        anomalyProbability=self._computeAnomalyProbability(aggRow))
Ejemplo n.º 28
0
        input_event = (numpy.array([x, y, z]), radius)
        timestamp = datetime.datetime.strptime(event.time,
                                               "%Y-%m-%dT%H:%M:%S.%fZ")
        # input_event = (timestamp, input_event)
        modelInput = {}
        modelInput["event"] = input_event
        modelInput["timestamp"] = (timestamp)
        result = model.run(modelInput)
        model.save(MODELSTATE)
        # print result

        if not PREDICT:
            # Anomaly-Stats:
            anomalyScore = result.inferences["anomalyScore"]
            # By default 0.5 for the first 600 iterations!
            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput["event"], anomalyScore, modelInput["timestamp"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
            AnomalyScores.append(anomalyScore)
            LikelihoodScores.append(
                [modelInput["timestamp"], modelInput["event"], likelihood])
            prediction = 'None'

        if PREDICT:
            # Handle Anomaly:
            anomalyScore, likelihood, logLikelihood = 'None', 'None', 'None'
            pred_result = shifter.shift(result)
            if result.inferences["multiStepBestPredictions"][1]:
                prediction = result.inferences["multiStepBestPredictions"][1]
                print prediction
            else:
                prediction = 'None'
Ejemplo n.º 29
0
class AnomalyLikelihoodRegion(PyRegion):
    """Region for computing the anomaly likelihoods."""
    @classmethod
    def getSpec(cls):
        return {
            "description": ("Region that computes anomaly likelihoods for \
                       temporal memory."),
            "singleNodeOnly":
            True,
            "inputs": {
                "rawAnomalyScore": {
                    "description": "The anomaly score whose \
                          likelihood is to be computed",
                    "dataType": "Real32",
                    "count": 1,
                    "required": True,
                    "isDefaultInput": False
                },
                "metricValue": {
                    "description": "The input metric value",
                    "dataType": "Real32",
                    "count": 1,
                    "required": True,
                    "isDefaultInput": False
                },
            },
            "outputs": {
                "anomalyLikelihood": {
                    "description": "The resultant anomaly likelihood",
                    "dataType": "Real32",
                    "count": 1,
                    "isDefaultOutput": True,
                },
            },
            "parameters": {
                "learningPeriod": {
                    "description": "The number of iterations required for the\
                          algorithm to learn the basic patterns in the dataset\
                          and for the anomaly score to 'settle down'.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 288,
                    "accessMode": "ReadWrite"
                },
                "estimationSamples": {
                    "description": "The number of reasonable anomaly scores\
                           required for the initial estimate of the\
                           Gaussian.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 100,
                    "accessMode": "ReadWrite"
                },
                "historicWindowSize": {
                    "description": "Size of sliding window of historical data\
                          points to maintain for periodic reestimation\
                          of the Gaussian.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 8640,
                    "accessMode": "ReadWrite"
                },
                "reestimationPeriod": {
                    "description": "How often we re-estimate the Gaussian\
                          distribution.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 100,
                    "accessMode": "ReadWrite"
                },
            },
            "commands": {},
        }

    def __init__(self,
                 learningPeriod=288,
                 estimationSamples=100,
                 historicWindowSize=8640,
                 reestimationPeriod=100):
        self.anomalyLikelihood = AnomalyLikelihood(
            learningPeriod=learningPeriod,
            estimationSamples=estimationSamples,
            historicWindowSize=historicWindowSize,
            reestimationPeriod=reestimationPeriod)

    def __eq__(self, other):
        return self.anomalyLikelihood == other.anomalyLikelihood

    def __ne__(self, other):
        return not self == other

    @classmethod
    def read(cls, proto):
        anomalyLikelihoodRegion = object.__new__(cls)
        anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(
            proto)

        return anomalyLikelihoodRegion

    def write(self, proto):
        self.anomalyLikelihood.write(proto)

    def initialize(self):
        pass

    def compute(self, inputs, outputs):
        anomalyScore = inputs["rawAnomalyScore"][0]
        value = inputs["metricValue"][0]
        anomalyProbability = self.anomalyLikelihood.anomalyProbability(
            value, anomalyScore)
        outputs["anomalyLikelihood"][0] = anomalyProbability
parser = argparse.ArgumentParser(description='Add to existing name')
parser.add_argument(
    '--algo',
    help='add to existing name especially if I am testing some new feature.')
args = parser.parse_args()
algo = args.algo


def get_all_files_path(root):
    files = [
        val for sublist in [[os.path.join(i[0], j) for j in i[2]]
                            for i in os.walk(root)] for val in sublist
    ]
    return files


files = get_all_files_path('results/' + algo)
for f in files:
    if (not ('_score' in f)):
        print(f)
        df = pd.read_csv(f)
        a = []
        al = AnomalyLikelihood()
        for i in range(len(df)):
            a.append(
                al.anomalyProbability(df.value.values[i],
                                      df.anomaly_score.values[i],
                                      df.timestamp.values[i]))
        df['anomaly_score'] = a
        df.to_csv(f, index=False)
Ejemplo n.º 31
0
  try:
    event = (numpy.array([x, y]), int(10*float(earthquake.mag)))
    modelInput = {}
    modelInput["event"] = event
    modelInput["timestamp"] = (
      datetime.datetime.strptime(earthquake.time, "%Y-%m-%dT%H:%M:%S.%fZ"))

    result = model.run(modelInput)
    anomalyScore = result.inferences["anomalyScore"]
    scores.append(anomalyScore)
    likelihoodScores.append([modelInput["timestamp"],
                             modelInput["event"],
                             anomalyScore])

    likelihood = anomalyLikelihood.anomalyProbability(
      event[0] + numpy.array([event[1]]),
      anomalyScore,
      modelInput["timestamp"])

    data = {"lat": earthquake.latitude,
            "lng": earthquake.longitude,
            "score": anomalyScore,
            "mag": earthquake.mag,
            "mean": (numpy.mean(scores), WINDOWSIZE),
            "timestamp": earthquake.time,
            "likelihood": likelihood}

    r.publish("nupic", json.dumps(data))
    print data

  except ValueError:
    pass
Ejemplo n.º 32
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self, slidingWindowSize=None, mode=MODE_PURE):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if self._mode == Anomaly.MODE_LIKELIHOOD:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)

    def compute(self,
                activeColumns,
                predictedColumns,
                inputValue=None,
                timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            if inputValue is None:
                raise ValueError(
                    "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                    "requires 'inputValue' as parameter to compute() method. ")

            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            # low likelihood -> hi anomaly
            score = 1 - probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            score = anomalyScore * (1 - probability)

        # Last, do moving-average if windowSize was specified.
        if self._movingAverage is not None:
            score = self._movingAverage.next(score)

        return score