Ejemplo n.º 1
0
def compute_scores(y_test, y_pred, normalize=False):
    # Errors
    errors = np.array((y_test - y_pred)**2)
    if normalize:
        errors = errors / float(errors.max() - errors.min())

    # Log likelihood.
    log_likelihoods = []
    anomaly_likelihood = AnomalyLikelihood()
    for i in range(len(y_test)):
        likelihood = anomaly_likelihood.anomalyProbability(y_test[i],
                                                           errors[i],
                                                           timestamp=None)
        log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood)
        log_likelihoods.append(log_likelihood)

    # Anomaly thresholds:
    # - HIGH: log_likelihood >= 0.5
    # - MEDIUM: 0.5 > log_likelihood >= 0.4
    N = len(log_likelihoods)
    anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)}
    x = np.array(log_likelihoods)
    high_idx = x >= 0.5
    anomalies['high'][high_idx] = 1
    # medium_idx = np.logical_and(x >= 0.4, x < 0.5)
    # anomalies['medium'][medium_idx] = 1

    return errors, log_likelihoods, anomalies
Ejemplo n.º 2
0
class buildmodel:
    def __init__(self):
        #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp")
        with open("model_params.json") as fp:
            self.model_params = json.load(fp)
        print self.model_params
        self.newmodel = ModelFactory.create(self.model_params)
        self.newmodel.enableLearning()
        self.newmodel.enableInference({"predictedField": "value"})
        self.DATE_FORMAT = "%d/%m/%Y %H:%M"
        self.anomalylikelihood = AnomalyLikelihood()

    def processdata(self, data):
        timestamp = datetime.datetime.strptime(data[0], self.DATE_FORMAT)
        ce = float(data[1])
        result = self.newmodel.run({"dttm": timestamp, "value": ce})
        #print result
        anomalyScore = result.inferences["anomalyScore"]
        anomaly = self.anomalylikelihood.anomalyProbability(
            ce, anomalyScore, timestamp)
        logLikelihood = self.anomalylikelihood.computeLogLikelihood(anomaly)
        logLikelihood = logLikelihood * 100
        print logLikelihood
        '''if anomaly > 0.999:
			print "Detected high level anomaly at "+str(timestamp)
		elif anomaly>0.958:
			print "Detected medium level anomaly at "+str(timestamp)'''
        if logLikelihood > 20:
            print "Detected high level anomaly at " + str(timestamp)
        elif logLikelihood > 15:
            print "Detected medium level anomaly at " + str(timestamp)
Ejemplo n.º 3
0
    def __init__(self,
                 slidingWindowSize=None,
                 mode=MODE_PURE,
                 binaryAnomalyThreshold=None):
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if (self._mode == Anomaly.MODE_LIKELIHOOD
                or self._mode == Anomaly.MODE_WEIGHTED):
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        else:
            self._likelihood = None

        if not self._mode in self._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)

        self._binaryThreshold = binaryAnomalyThreshold
        if binaryAnomalyThreshold is not None and (
                not isinstance(binaryAnomalyThreshold, float)
                or binaryAnomalyThreshold >= 1.0
                or binaryAnomalyThreshold <= 0.0):
            raise ValueError(
                "Anomaly: binaryAnomalyThreshold must be from (0,1) "
                "or None if disabled.")
Ejemplo n.º 4
0
  def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score
         will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
         The transformation is applied after moving average is computed and updated.
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and ( 
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or 
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")
Ejemplo n.º 5
0
  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
Ejemplo n.º 6
0
def runAvogadroAnomaly(metric, options):
    """
  Create a new HTM Model, fetch the data from the local DB, process it in NuPIC,
  and save the results to a new CSV output file.

  :param metric: AvogadroAgent metric class
  :param options: CLI Options
  """
    model = createModel(metric)
    model.enableInference({"predictedField": metric.name})

    fetched = metric.fetch(prefix=options.prefix, start=None)

    resultFile = open(
        os.path.join(options.prefix, metric.name + "-result.csv"), "wb")
    csvWriter = csv.writer(resultFile)
    csvWriter.writerow([
        "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood",
        "color"
    ])

    headers = ("timestamp", metric.name)

    anomalyLikelihood = AnomalyLikelihood()

    for (ts, value) in fetched:
        try:
            value = float(value)
        except (ValueError, TypeError):
            continue

        if not math.isnan(value):
            modelInput = dict(zip(headers, (ts, value)))
            modelInput[metric.name] = float(value)
            modelInput["timestamp"] = datetime.datetime.fromtimestamp(
                float(modelInput["timestamp"]))
            result = model.run(modelInput)
            anomalyScore = result.inferences["anomalyScore"]

            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput[metric.name], anomalyScore, modelInput["timestamp"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)

            if logLikelihood > .5:
                color = "red"
            elif logLikelihood > .4 and logLikelihood <= .5:
                color = "yellow"
            else:
                color = "green"

            csvWriter.writerow([
                modelInput["timestamp"],
                float(value), anomalyScore, logLikelihood, color
            ])

    else:
        resultFile.flush()
Ejemplo n.º 7
0
 def __init__(self):
     #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp")
     with open("model_params.json") as fp:
         self.model_params = json.load(fp)
     print self.model_params
     self.newmodel = ModelFactory.create(self.model_params)
     self.newmodel.enableLearning()
     self.newmodel.enableInference({"predictedField": "value"})
     self.DATE_FORMAT = "%d/%m/%Y %H:%M"
     self.anomalylikelihood = AnomalyLikelihood()
Ejemplo n.º 8
0
 def __init__(self,
              learningPeriod=288,
              estimationSamples=100,
              historicWindowSize=8640,
              reestimationPeriod=100):
     self.anomalyLikelihood = AnomalyLikelihood(
         learningPeriod=learningPeriod,
         estimationSamples=estimationSamples,
         historicWindowSize=historicWindowSize,
         reestimationPeriod=reestimationPeriod)
def runAvogadroAnomaly(metric, options):
  """
  Create a new HTM Model, fetch the data from the local DB, process it in NuPIC,
  and save the results to a new CSV output file.

  :param metric: AvogadroAgent metric class
  :param options: CLI Options
  """
  model = createModel(metric)
  model.enableInference({"predictedField": metric.name})

  fetched = metric.fetch(prefix=options.prefix, start=None)

  resultFile = open(os.path.join(options.prefix, metric.name + "-result.csv"),
                    "wb")
  csvWriter = csv.writer(resultFile)
  csvWriter.writerow(["timestamp", metric.name, "raw_anomaly_score",
                      "anomaly_likelihood", "color"])

  headers = ("timestamp", metric.name)

  anomalyLikelihood = AnomalyLikelihood()

  for (ts, value) in fetched:
    try:
      value = float(value)
    except (ValueError, TypeError):
      continue

    if not math.isnan(value):
      modelInput = dict(zip(headers, (ts, value)))
      modelInput[metric.name] = float(value)
      modelInput["timestamp"] = datetime.datetime.fromtimestamp(
        float(modelInput["timestamp"]))
      result = model.run(modelInput)
      anomalyScore = result.inferences["anomalyScore"]

      likelihood = anomalyLikelihood.anomalyProbability(
        modelInput[metric.name], anomalyScore, modelInput["timestamp"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)

      if logLikelihood > .5:
        color = "red"
      elif logLikelihood > .4 and logLikelihood <= .5:
        color = "yellow"
      else:
        color = "green"

      csvWriter.writerow([modelInput["timestamp"], float(value),
                          anomalyScore, logLikelihood, color])

  else:
    resultFile.flush()
Ejemplo n.º 10
0
  def __init__(self, modelId, stats, replaceParams=()):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :param sequence replaceParams: Parameter replacement PATH REPLACEMENT pairs
    """
    self._modelId = modelId

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats, replaceParams=replaceParams)

    self._anomalyLikelihood = AnomalyLikelihood()
Ejemplo n.º 11
0
  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    self._useMovingAverage = slidingWindowSize > 0
    self._buf = None
    self._i = None

    # Using cumulative anomaly, sliding window
    if self._useMovingAverage:
      self._windowSize = slidingWindowSize
      # Sliding window buffer
      self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
      self._i = 0 # index pointer to actual position
    elif slidingWindowSize is not None:
      raise TypeError(
          "Anomaly: if you define slidingWindowSize, it has to be an "
          "integer > 0;  slidingWindowSize=%r" % slidingWindowSize)

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
Ejemplo n.º 12
0
  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()
Ejemplo n.º 13
0
  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
Ejemplo n.º 14
0
  def __init__(self,
               slidingWindowSize=None,
               mode=MODE_PURE,
               binaryAnomalyThreshold=None):
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if (self._mode == Anomaly.MODE_LIKELIHOOD or
        self._mode == Anomaly.MODE_WEIGHTED):
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    else:
      self._likelihood = None

    if not self._mode in self._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)

    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and (
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")
Ejemplo n.º 15
0
    def __init__(self, config):

        # Instantiate NuPIC model
        model_params = base_model_params.MODEL_PARAMS

        # Set resolution
        model_params['modelParams']['sensorParams']['encoders']['value']['resolution'] = config['resolution']

        # Override other Nupic parameters:
        model_params['modelParams'] = update_dict(model_params['modelParams'], config['nupic_model_params'])

        # Create model and enable inference on it
        self.model = ModelFactory.create(model_params)
        self.model.enableInference({'predictedField': 'value'})

        # The shifter is used to bring the predictions to the actual time frame
        self.shifter = InferenceShifter()

        # The anomaly likelihood object
        self.anomalyLikelihood = AnomalyLikelihood()

        # Set stream source
        self.stream = config['stream']

        # Setup class variables
        self.db = redis.Redis('localhost')
        self.seconds_per_request = config['seconds_per_request']
        self.webhook = config['webhook']
        self.anomaly_threshold = config['anomaly_threshold']
        self.likelihood_threshold = config['likelihood_threshold']
        self.domain = config['domain']
        self.alert = False # Toogle when we get above threshold

        # Setup logging
        self.logger =  logger or logging.getLogger(__name__)
        handler = logging.handlers.RotatingFileHandler(os.environ['LOG_DIR']+"/monitor_%s.log" % self.stream.name,
                                                       maxBytes=1024*1024,
                                                       backupCount=4,
                                                      )

        handler.setFormatter(logging.Formatter('[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s'))
        handler.setLevel(logging.INFO)
        self.logger.addHandler(handler)
        self.logger.setLevel(logging.INFO)

        self.logger.info("=== Settings ===")
        self.logger.info("Webhook: %s", self.webhook)
        self.logger.info("Domain: %s", self.domain)
        self.logger.info("Seconds per request: %d", self.seconds_per_request)

        # Write metadata to Redis
        try:
            # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly'
            self.db.set('name:%s' % self.stream.id, self.stream.name)
            self.db.set('value_label:%s' % self.stream.id, self.stream.value_label)
            self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit)
        except Exception:
            self.logger.warn("Could not write results to redis.", exc_info=True)
Ejemplo n.º 16
0
  def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec):
    """
    :param inputFileObj: A file-like object that contains input metric data
    :param dict inputSpec: Input data specification per input_opt_schema.json
    :param dict aggSpec: Optional aggregation specification per
      agg_opt_schema.json or None if no aggregation is requested
    :param dict modelSpec: Model specification per model_opt_schema.json
    """
    self._inputSpec = inputSpec

    self._aggSpec = aggSpec

    self._modelSpec = modelSpec

    if "modelId" in modelSpec:
      self._modelId = modelSpec["modelId"]
    else:
      self._modelId = "Unknown"


    inputRecordSchema = (
      fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"],
                              fieldmeta.FieldMetaType.datetime,
                              fieldmeta.FieldMetaSpecial.timestamp),
      fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"],
                              fieldmeta.FieldMetaType.float,
                              fieldmeta.FieldMetaSpecial.none),
    )

    self._aggregator = aggregator.Aggregator(
      aggregationInfo=dict(
        fields=([(modelSpec["valueFieldName"], aggSpec["func"])]
                if aggSpec is not None else []),
        seconds=aggSpec["windowSize"] if aggSpec is not None else 0
      ),
      inputFields=inputRecordSchema)

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=inputRecordSchema)

    self._model = self._createModel(modelSpec=modelSpec)

    self._anomalyLikelihood = AnomalyLikelihood()

    self._csvReader = self._createCsvReader(inputFileObj)
Ejemplo n.º 17
0
 def create_anomaly_likelihood_calc_from_disk(self, metric):
     anomaly_likelihood_calculators_path = self.__model_storage_manager.get_save_path(
         metric["metric_name"],
         path_element="anomaly_likelihood_calculator")
     with open(
             os.path.join(anomaly_likelihood_calculators_path,
                          self.__anomaly_likelihood_calculator_filename),
             "rb") as anomaly_likelihood_calc_file:
         return AnomalyLikelihood.readFromFile(anomaly_likelihood_calc_file)
Ejemplo n.º 18
0
    def get_anomaly_likelihood_calc(self, metric,
                                    models_number_below_configured_limit):
        anomaly_likelihood_calc = None
        if not self.__loaded_models.anomaly_calc_exists(metric["metric_name"]):
            anomaly_likelihood_calculators_path = self.__model_storage_manager.get_save_path(
                metric["metric_name"],
                path_element="anomaly_likelihood_calculator")

            if os.path.isfile(
                    os.path.join(
                        anomaly_likelihood_calculators_path,
                        self._anomaly_likelihood_calculator_filename)):
                if models_number_below_configured_limit:
                    try:
                        if models_number_below_configured_limit:
                            self.__loaded_models.add_anomaly_calc_for_metric(
                                metric["metric_name"],
                                self.__anomaly_likelihood_calculator_factory.
                                create_anomaly_likelihood_calc_from_disk(
                                    metric))
                            self._logger.debug(
                                "get_anomaly_likelihood_calc",
                                "LOADED ANOMALY_LIKELIHOOD_CALC FROM FILE",
                                metric=str(metric["metric_name"]))

                    except Exception as ex:
                        if models_number_below_configured_limit:
                            self.__loaded_models.add_anomaly_calc_for_metric(
                                metric["metric_name"], AnomalyLikelihood())
                            self._logger.warn(
                                "get_anomaly_likelihood_calc",
                                "Failed to create an anomaly likelihood calc from disk",
                                metric=str(metric["metric_name"]),
                                exception_type=str(type(ex).__name__),
                                exception_message=str(ex.message))

            else:
                if models_number_below_configured_limit:
                    self.__loaded_models.add_anomaly_calc_for_metric(
                        metric["metric_name"], AnomalyLikelihood())
        if self.__loaded_models.anomaly_calc_exists(metric["metric_name"]):
            anomaly_likelihood_calc = self.__loaded_models.get_anomaly_calc(
                metric["metric_name"])
        return anomaly_likelihood_calc
Ejemplo n.º 19
0
 def __init__(self,
              learningPeriod = 288,
              estimationSamples = 100,
              historicWindowSize = 8640,
              reestimationPeriod = 100):
   self.anomalyLikelihood = AnomalyLikelihood(
     learningPeriod = learningPeriod,
     estimationSamples = estimationSamples,
     historicWindowSize = historicWindowSize,
     reestimationPeriod = reestimationPeriod)
    def testLikelihoodValues(self):
        """ test to see if the region keeps track of state correctly and produces
        the same likelihoods as the AnomalyLikelihood module """
        anomalyLikelihoodRegion = AnomalyLikelihoodRegion()
        anomalyLikelihood = AnomalyLikelihood()

        inputs = AnomalyLikelihoodRegion.getSpec()['inputs']
        outputs = AnomalyLikelihoodRegion.getSpec()['outputs']
        with open(_INPUT_DATA_FILE) as f:
            reader = csv.reader(f)
            reader.next()
            for record in reader:
                consumption = float(record[1])
                anomalyScore = float(record[2])
                likelihood1 = anomalyLikelihood.anomalyProbability(
                    consumption, anomalyScore)

                inputs['rawAnomalyScore'] = numpy.array([anomalyScore])
                inputs['metricValue'] = numpy.array([consumption])
                anomalyLikelihoodRegion.compute(inputs, outputs)
                likelihood2 = outputs['anomalyLikelihood'][0]

                self.assertEqual(likelihood1, likelihood2)
Ejemplo n.º 21
0
  def testLikelihoodValues(self):
    """ test to see if the region keeps track of state correctly and produces
        the same likelihoods as the AnomalyLikelihood module """
    anomalyLikelihoodRegion = AnomalyLikelihoodRegion()
    anomalyLikelihood = AnomalyLikelihood()

    inputs = AnomalyLikelihoodRegion.getSpec()['inputs']
    outputs = AnomalyLikelihoodRegion.getSpec()['outputs']
    with open (_INPUT_DATA_FILE) as f:
      reader = csv.reader(f)
      reader.next()
      for record in reader:
        consumption = float(record[1])
        anomalyScore = float(record[2])
        likelihood1 = anomalyLikelihood.anomalyProbability(
          consumption, anomalyScore)

        inputs['rawAnomalyScore'] = numpy.array([anomalyScore])
        inputs['metricValue'] = numpy.array([consumption])
        anomalyLikelihoodRegion.compute(inputs, outputs)
        likelihood2 = outputs['anomalyLikelihood'][0]

        self.assertEqual(likelihood1, likelihood2)
Ejemplo n.º 22
0
def definir_AnomDetect(N_DATA):

    """ 
    retorna as classes de anom_score, a classe de anom_likelihood, e os arrays que guardarão a anom_score e a anom_likelihood
    """

    anom_score_txt = np.zeros((N_DATA+1,))
    anom_logscore_txt = np.zeros((N_DATA+1,))

    anomaly_score = Anomaly(slidingWindowSize=25)

    anomaly_likelihood = AnomalyLikelihood(learningPeriod=600, historicWindowSize=313)

    return anomaly_score, anomaly_likelihood, anom_score_txt, anom_logscore_txt
Ejemplo n.º 23
0
    def __init__(self, slidingWindowSize=None, mode=MODE_PURE):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
        self._mode = mode
        self._useMovingAverage = slidingWindowSize > 0
        self._buf = None
        self._i = None

        # Using cumulative anomaly, sliding window
        if self._useMovingAverage:
            self._windowSize = slidingWindowSize
            # Sliding window buffer
            self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
            self._i = 0  # index pointer to actual position
        elif slidingWindowSize is not None:
            raise TypeError(
                "Anomaly: if you define slidingWindowSize, it has to be an "
                "integer > 0;  slidingWindowSize=%r" % slidingWindowSize)

        if self._mode == Anomaly.MODE_LIKELIHOOD:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)
Ejemplo n.º 24
0
  def __init__(self, modelId, stats, replaceParams):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :param sequence replaceParams: Parameter replacement PATH REPLACEMENT pairs
    """
    self._modelId = modelId

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats, replaceParams=replaceParams)

    self._anomalyLikelihood = AnomalyLikelihood()
Ejemplo n.º 25
0
  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()
Ejemplo n.º 26
0
    def _send_predictions(self, metric_id, metric_envelope):
        if metric_id not in self._models:
            self._models[metric_id] = ModelFactory.create(self.model_params)
            self._models[metric_id].enableInference(
                {'predictedField': 'value'})
            self._shifters[metric_id] = InferenceShifter()
            self._anomaly_likelihood[metric_id] = AnomalyLikelihood()

        model = self._models[metric_id]
        shifter = self._shifters[metric_id]

        modelInput = {
            # 'dttm': value['metric']['timestamp'],
            'dttm': datetime.datetime.now(),
            'value': metric_envelope['metric']['value']
        }

        result = shifter.shift(model.run(modelInput))
        inferences = result.inferences
        inference = inferences['multiStepBestPredictions'][5]

        metric = metric_envelope['metric']
        metric_name = metric['name']

        if inference is not None:
            metric['name'] = metric_name + '.nupic.predicted'
            metric['value'] = inference
            str_value = simplejson.dumps(metric_envelope)
            self._producer.send_messages(self._topic, str_value)

        if 'anomalyScore' in inferences:
            metric['name'] = metric_name + '.nupic.anomaly_score'
            metric['value'] = inferences['anomalyScore']
            str_value = simplejson.dumps(metric_envelope)
            self._producer.send_messages(self._topic, str_value)

            anomalyLikelihood = self._anomaly_likelihood[metric_id]
            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput['value'], inferences['anomalyScore'],
                datetime.datetime.now())

            metric['name'] = metric_name + '.nupic.anomaly_likelihood'
            metric['value'] = likelihood
            str_value = simplejson.dumps(metric_envelope)
            self._producer.send_messages(self._topic, str_value)
Ejemplo n.º 27
0
  def __init__(self, slidingWindowSize = None, anomalyMode=MODE_PURE, 
               shiftPredicted=False):
    """
    @param (optional) slidingWindowSize -- enables moving average on final 
                      anomaly score; how many elements are summed up, 
                      sliding window size; int >= 0
    @param (optional) anomalyMode -- (string) how to compute anomaly;
                      possible values are: 
                         -- "pure" -- the default, how much anomal the value is; 
                                      float 0..1 where 1=totally unexpected
                         -- "likelihood" -- uses the anomaly_likelihood code; 
                                      models probability of receiving this 
                                      value and anomalyScore; used in Grok
                         -- "weighted" -- "pure" anomaly weighted by "likelihood" (anomaly * likelihood)
    @param shiftPredicted (optional) -- boolean [default=False]; 
                                      normally active vs predicted are compared
                          if shiftPredicted=True: predicted(T-1) vs active(T) 
                             are compared (eg from TP, CLAModel)
    """

    # using cumulative anomaly , sliding window
    if slidingWindowSize > 0:
      self._windowSize = slidingWindowSize
      #sliding window buffer
      self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
      self._i = 0 # index pointer to actual position
    elif slidingWindowSize is not None:
      raise Exception("Anomaly: if you define slidingWindowSize, \
      it has to be an integer > 0; \
      slidingWindowSize="+str(slidingWindowSize))

    # mode
    self._mode = anomalyMode
    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError('Invalid anomaly mode; only supported modes are: \
                       "Anomaly.MODE_PURE", "Anomaly.MODE_LIKELIHOOD", \
                       "Anomaly.MODE_WEIGHTED"; you used:' +self._mode)

    if shiftPredicted:
      self._prevPredictedColumns = numpy.array([])
Ejemplo n.º 28
0
def main():
    # cluster similar inputs together in SDR space
    s = SpatialPooler()
    print(type(s))

    # powerful sequence memory in SDR space
    t = TemporalMemory()
    print(type(t))

    # computes rolling Gaussian based on raw anomaly scores and then their
    # likelihood
    a = AnomalyLikelihood()
    print(type(a))

    # temporally groups active cell sets from TM
    u = UnionTemporalPooler()
    print(type(u))

    # learning pairings of Union representations and labeled classes
    c = SDRClassifier()
    print(type(c))
Ejemplo n.º 29
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayAnomalyParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """
    # Generate swarm params
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
      metricData=[0],
      minVal=stats["min"],
      maxVal=stats["max"],
      minResolution=stats.get("minResolution"))

    model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
    model.enableLearning()
    model.enableInference(swarmParams["inferenceArgs"])

    return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyProbability):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyProbability: computed anomaly probability value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, inputRow):
    """ Compute anomaly log likelihood score

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=inputRow[1],
      anomalyScore=rawAnomalyScore,
      timestamp=inputRow[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyProbability = self._computeAnomalyProbability(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyProbability=anomalyProbability)
Ejemplo n.º 30
0
class AnomalyLikelihoodRegion(PyRegion):
  """Region for computing the anomaly likelihoods."""


  @classmethod
  def getSpec(cls):
    return {
      "description": ("Region that computes anomaly likelihoods for \
                       temporal memory."),
      "singleNodeOnly": True,
      "inputs": {
        "rawAnomalyScore": {
          "description": "The anomaly score whose \
                          likelihood is to be computed",
          "dataType": "Real32",
          "count": 1,
          "required": True,
          "isDefaultInput": False
        },
        "metricValue": {
          "description": "The input metric value",
          "dataType": "Real32",
          "count": 1,
          "required": True,
          "isDefaultInput": False
        },
      },
      "outputs": {
        "anomalyLikelihood": {
          "description": "The resultant anomaly likelihood",
          "dataType": "Real32",
          "count": 1,
          "isDefaultOutput": True,
        },
      },
      "parameters": {
        "learningPeriod": {
          "description": "The number of iterations required for the\
                          algorithm to learn the basic patterns in the dataset\
                          and for the anomaly score to 'settle down'.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 288,
          "accessMode": "ReadWrite"
        },
        "estimationSamples": {
          "description": "The number of reasonable anomaly scores\
                           required for the initial estimate of the\
                           Gaussian.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 100,
          "accessMode": "ReadWrite"
        },
        "historicWindowSize": {
          "description": "Size of sliding window of historical data\
                          points to maintain for periodic reestimation\
                          of the Gaussian.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 8640,
          "accessMode": "ReadWrite"
        },
        "reestimationPeriod": {
          "description": "How often we re-estimate the Gaussian\
                          distribution.",
          "dataType": "UInt32",
          "count": 1,
          "constraints": "",
          "defaultValue": 100,
          "accessMode": "ReadWrite"
        },
      },
      "commands": {
      },
    }


  def __init__(self,
               learningPeriod = 288,
               estimationSamples = 100,
               historicWindowSize = 8640,
               reestimationPeriod = 100):
    self.anomalyLikelihood = AnomalyLikelihood(
      learningPeriod = learningPeriod,
      estimationSamples = estimationSamples,
      historicWindowSize = historicWindowSize,
      reestimationPeriod = reestimationPeriod)

  def __eq__(self, other):
    return self.anomalyLikelihood == other.anomalyLikelihood


  def __ne__(self, other):
    return not self == other


  @classmethod
  def read(cls, proto):
    anomalyLikelihoodRegion = object.__new__(cls)
    anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(proto)

    return anomalyLikelihoodRegion


  def write(self, proto):
    self.anomalyLikelihood.write(proto)


  def initialize(self):
    pass


  def compute(self, inputs, outputs):
    anomalyScore = inputs["rawAnomalyScore"][0]
    value = inputs["metricValue"][0]
    anomalyProbability = self.anomalyLikelihood.anomalyProbability(
      value, anomalyScore)
    outputs["anomalyLikelihood"][0] = anomalyProbability
Ejemplo n.º 31
0
class Monitor(object):
    """ A NuPIC model that saves results to Redis. """

    def __init__(self, config):

        # Instantiate NuPIC model
        model_params = base_model_params.MODEL_PARAMS

        # Set resolution
        model_params['modelParams']['sensorParams']['encoders']['value']['resolution'] = config['resolution']

        # Override other Nupic parameters:
        model_params['modelParams'] = update_dict(model_params['modelParams'], config['nupic_model_params'])

        # Create model and enable inference on it
        self.model = ModelFactory.create(model_params)
        self.model.enableInference({'predictedField': 'value'})

        # The shifter is used to bring the predictions to the actual time frame
        self.shifter = InferenceShifter()

        # The anomaly likelihood object
        self.anomalyLikelihood = AnomalyLikelihood()

        # Set stream source
        self.stream = config['stream']

        # Setup class variables
        self.db = redis.Redis('localhost')
        self.seconds_per_request = config['seconds_per_request']
        self.webhook = config['webhook']
        self.anomaly_threshold = config['anomaly_threshold']
        self.likelihood_threshold = config['likelihood_threshold']
        self.domain = config['domain']
        self.alert = False # Toogle when we get above threshold

        # Setup logging
        self.logger =  logger or logging.getLogger(__name__)
        handler = logging.handlers.RotatingFileHandler(os.environ['LOG_DIR']+"/monitor_%s.log" % self.stream.name,
                                                       maxBytes=1024*1024,
                                                       backupCount=4,
                                                      )

        handler.setFormatter(logging.Formatter('[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s'))
        handler.setLevel(logging.INFO)
        self.logger.addHandler(handler)
        self.logger.setLevel(logging.INFO)

        self.logger.info("=== Settings ===")
        self.logger.info("Webhook: %s", self.webhook)
        self.logger.info("Domain: %s", self.domain)
        self.logger.info("Seconds per request: %d", self.seconds_per_request)

        # Write metadata to Redis
        try:
            # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly'
            self.db.set('name:%s' % self.stream.id, self.stream.name)
            self.db.set('value_label:%s' % self.stream.id, self.stream.value_label)
            self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit)
        except Exception:
            self.logger.warn("Could not write results to redis.", exc_info=True)

    def train(self):
        data = self.stream.historic_data()

        for model_input in data:
            self.update(model_input, False) # Don't post anomalies in training

    def loop(self):
        while True:
            data = self.stream.new_data()

            for model_input in data:
                self.update(model_input, True) # Post anomalies when online

            sleep(self.seconds_per_request)

    def update(self, model_input, is_to_post):
        # Pass the input to the model
        result = self.model.run(model_input)

        # Shift results
        result = self.shifter.shift(result)

        # Save multi step predictions
        inference = result.inferences['multiStepPredictions']

        # Take the anomaly_score
        anomaly_score = result.inferences['anomalyScore']

        # Compute the Anomaly Likelihood
        likelihood = self.anomalyLikelihood.anomalyProbability(model_input['value'],
                                                               anomaly_score,
                                                               model_input['time'])

        # Get the predicted value for reporting
        predicted = result.inferences['multiStepBestPredictions'][1]

        # Get timestamp from datetime
        timestamp = calendar.timegm(model_input['time'].timetuple())

        self.logger.info("Processing: %s", strftime("%Y-%m-%d %H:%M:%S", model_input['time'].timetuple()))

        # Save results to Redis
        if inference[1]:
            try:
                # Save in redis with key = 'results:monitor_id' and value = 'time, raw_value, actual, prediction, anomaly'
                # * actual: is the value processed  by the NuPIC model, which can be
                #           an average of raw_values
                # * predicition: prediction based on 'actual' values.
                self.db.rpush('results:%s' % self.stream.id,
                              '%s,%.5f,%.5f,%.5f,%.5f,%.5f' % (timestamp,
                                                          model_input['raw_value'],
                                                          result.rawInput['value'],
                                                          predicted,
                                                          anomaly_score,
                                                          likelihood))
                max_items = 10000
                ln = self.db.llen('results:%s' % self.stream.id)
                if ln > max_items:
                    self.db.ltrim('results:%s' % self.stream.id, ln - max_items, ln)
            except Exception:
                self.logger.warn("Could not write results to redis.", exc_info=True)

        # See if above threshold (in which case anomalous is True)
        anomalous = False
        if self.anomaly_threshold is not None:
            if anomaly_score >= self.anomaly_threshold:
                anomalous = True
        if self.likelihood_threshold is not None:
            if likelihood >= self.likelihood_threshold:
                anomalous = True

        # Post if webhook is not None
        if is_to_post and self.webhook is not None:
            # Check if it was in alert state in previous time step
            was_alerted = self.alert
            # Update alert state
            self.alert = anomalous

            # Send notification if webhook is set and if:
            # was not alerted before and is alerted now (entered anomalous state)
            # or
            # was alerted before and is not alerted now (left anomalous state)
            if not was_alerted and self.alert:
                report = {'anomaly_score': anomaly_score,
                          'likelihood': likelihood,
                          'model_input': {'time': model_input['time'].isoformat(),
                                          'value': model_input['raw_value']}}
                self._send_post(report)

        # Return anomalous state
        return {"likelihood" : likelihood,  "anomalous" : anomalous, "anomalyScore" : anomaly_score, "predicted" : predicted}

    def delete(self):
        """ Remove this monitor from redis """

        self.db.delete("results:%s" % self.stream.id)
        self.db.delete('name:%s' % self.stream.id)
        self.db.delete('value_label:%s' % self.stream.id)
        self.db.delete('value_unit:%s' % self.stream.id)

    def _send_post(self, report):
        """ Send HTTP POST notification. """

        if "hooks.slack.com" not in self.webhook:
            payload = {'sent_at': datetime.utcnow().isoformat(),
                       'report': report,
                       'monitor': self.stream.name,
                       'source': type(self.stream).__name__,
                       'metric': '%s (%s)' % (self.stream.value_label, self.stream.value_unit),
                       'chart': 'http://%s?id=%s' % (self.domain, self.stream.id)}
        else:
            payload = {'username': '******',
                       'icon_url': 'https://rawgithub.com/cloudwalkio/omg-monitor/slack-integration/docs/images/post_icon.png',
                       'text':  'Anomalous state in *%s* from _%s_:' % (self.stream.name, type(self.stream).__name__),
                       'attachments': [{'color': 'warning',
                                        'fields': [{'title': 'Chart',
                                                    'value':  'http://%s?id=%s' % (self.domain, self.stream.id),
                                                    'short': False},
                                                   {'title': 'Metric',
                                                    'value': self.stream.value_label,
                                                    'short': True},
                                                   {'title': 'Value',
                                                    'value': str(report['model_input']['value']) + ' ' + self.stream.value_unit,
                                                    'short': True}]}]}

        headers = {'Content-Type': 'application/json'}
        try:
            response = requests.post(self.webhook, data=json.dumps(payload), headers=headers)
        except Exception:
            self.logger.warn('Failed to post anomaly.', exc_info=True)
            return

        self.logger.info('Anomaly posted with status code %d: %s', response.status_code, response.text)
        return
parser = argparse.ArgumentParser(description='Add to existing name')
parser.add_argument(
    '--algo',
    help='add to existing name especially if I am testing some new feature.')
args = parser.parse_args()
algo = args.algo


def get_all_files_path(root):
    files = [
        val for sublist in [[os.path.join(i[0], j) for j in i[2]]
                            for i in os.walk(root)] for val in sublist
    ]
    return files


files = get_all_files_path('results/' + algo)
for f in files:
    if (not ('_score' in f)):
        print(f)
        df = pd.read_csv(f)
        a = []
        al = AnomalyLikelihood()
        for i in range(len(df)):
            a.append(
                al.anomalyProbability(df.value.values[i],
                                      df.anomaly_score.values[i],
                                      df.timestamp.values[i]))
        df['anomaly_score'] = a
        df.to_csv(f, index=False)
Ejemplo n.º 33
0
class Anomaly(object):
  """basic class that computes anomaly

     Anomaly is used to detect strange patterns/behaviors (outliners) 
     by a trained CLA model. 
  """



  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = [MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED]


  def __init__(self, slidingWindowSize = None, anomalyMode=MODE_PURE, 
               shiftPredicted=False):
    """
    @param (optional) slidingWindowSize -- enables moving average on final 
                      anomaly score; how many elements are summed up, 
                      sliding window size; int >= 0
    @param (optional) anomalyMode -- (string) how to compute anomaly;
                      possible values are: 
                         -- "pure" -- the default, how much anomal the value is; 
                                      float 0..1 where 1=totally unexpected
                         -- "likelihood" -- uses the anomaly_likelihood code; 
                                      models probability of receiving this 
                                      value and anomalyScore; used in Grok
                         -- "weighted" -- "pure" anomaly weighted by "likelihood" (anomaly * likelihood)
    @param shiftPredicted (optional) -- boolean [default=False]; 
                                      normally active vs predicted are compared
                          if shiftPredicted=True: predicted(T-1) vs active(T) 
                             are compared (eg from TP, CLAModel)
    """

    # using cumulative anomaly , sliding window
    if slidingWindowSize > 0:
      self._windowSize = slidingWindowSize
      #sliding window buffer
      self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
      self._i = 0 # index pointer to actual position
    elif slidingWindowSize is not None:
      raise Exception("Anomaly: if you define slidingWindowSize, \
      it has to be an integer > 0; \
      slidingWindowSize="+str(slidingWindowSize))

    # mode
    self._mode = anomalyMode
    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError('Invalid anomaly mode; only supported modes are: \
                       "Anomaly.MODE_PURE", "Anomaly.MODE_LIKELIHOOD", \
                       "Anomaly.MODE_WEIGHTED"; you used:' +self._mode)

    if shiftPredicted:
      self._prevPredictedColumns = numpy.array([])


  def computeAnomalyScore(self, activeColumns, predictedColumns, value=None, 
                          timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted
  
    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step 
                             (used for anomaly in step T+1)
    @param value: (optional) input value, that is what activeColumns represent
                              (used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured 
                              (used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """

    if hasattr(self, "_prevPredictedColumns"): # shiftPredicted==True
      prevPredictedColumns = self._prevPredictedColumns
      self._prevPredictedColumns = predictedColumns # to be used in step T+1
    else:
      prevPredictedColumns = predictedColumns

    # 1. here is the 'classic' anomaly score
    anomalyScore = computeRawAnomalyScore(activeColumns, prevPredictedColumns)

    # compute final anomaly based on selected mode
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp)
      score = probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(value, anomalyScore, timestamp)
      score = anomalyScore * probability

    # last, do moving-average if windowSize is set
    if hasattr(self, "_windowSize"):
      score = self._movingAverage(score)

    return score


  def _movingAverage(self, newElement=None):
    """moving average

    @param newValue (optional) add a new element before computing the avg
    @return moving average of self._windowSize last elements
    """
    if newElement is not None:
      self._buf[self._i]= newElement
      self._i = (self._i + 1) % self._windowSize
    return self._buf.sum()/float(self._windowSize) # normalize to 0..1
Ejemplo n.º 34
0
def runAnomaly(options):
    global g_ps_count_dict_unsorted
    global g_abnomal_data_dict_unsorted
    """
    Create and run a CLA Model on the given dataset (based on the hotgym anomaly
    client in NuPIC).
    """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    if options.oswpsDir != "":
        # Get PS dictionary
        osw = OSWData(options.oswpsDir, PS)
        osw.traverse_dir()
        g_ps_count_dict_unsorted = osw.get_ps_dict()
        options.max = ps_max_value = max(g_ps_count_dict_unsorted.values())
        options.min = ps_min_value = min(g_ps_count_dict_unsorted.values())
        print("Min value:" + str(ps_min_value) + ', ' + "Max value:" +
              str(ps_max_value))

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print("Using resolution value: {0}".format(resolution))
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    if options.inputFile != "":
        with open(options.inputFile) as fin:
            # Open file and setup headers
            # Here we write the log likelihood value as the 'anomaly score'
            # The actual CLA outputs are labeled 'raw anomaly score'
            reader = csv.reader(fin)
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            headers = reader.next()

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, record in enumerate(reader, start=1):

                # Convert input data to a dict so we can pass it into the model
                inputData = dict(zip(headers, record))
                inputData["value"] = float(inputData["value"])
                inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"
    elif options.oswpsDir != "":
        if options.use_rtm == True:
            rtm_sensitivity = 2
            rtm = LinearRegressionTemoporalMemory(window=10,
                                                  interval=10,
                                                  min_=options.min,
                                                  max_=options.max,
                                                  boost=rtm_sensitivity,
                                                  leak_detection=0,
                                                  critical_region="right_tail",
                                                  debug=0)
            g_abnomal_data_dict_unsorted = rtm.analyze(
                g_ps_count_dict_unsorted)
        else:
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            ps_od = collections.OrderedDict(
                sorted(g_ps_count_dict_unsorted.items()))

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, timestamp in enumerate(ps_od):
                ps_count = ps_od[timestamp]

                inputData = {}
                inputData["value"] = float(ps_count)
                inputData["dttm"] = dateutil.parser.parse(timestamp)
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood
                    g_abnomal_data_dict_unsorted[timestamp] = ps_count

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"

            print "Completed processing", i, "records at", datetime.datetime.now(
            )
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
Ejemplo n.º 35
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """


  def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec):
    """
    :param inputFileObj: A file-like object that contains input metric data
    :param dict inputSpec: Input data specification per input_opt_schema.json
    :param dict aggSpec: Optional aggregation specification per
      agg_opt_schema.json or None if no aggregation is requested
    :param dict modelSpec: Model specification per model_opt_schema.json
    """
    self._inputSpec = inputSpec

    self._aggSpec = aggSpec

    self._modelSpec = modelSpec

    if "modelId" in modelSpec:
      self._modelId = modelSpec["modelId"]
    else:
      self._modelId = "Unknown"


    inputRecordSchema = (
      fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"],
                              fieldmeta.FieldMetaType.datetime,
                              fieldmeta.FieldMetaSpecial.timestamp),
      fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"],
                              fieldmeta.FieldMetaType.float,
                              fieldmeta.FieldMetaSpecial.none),
    )

    self._aggregator = aggregator.Aggregator(
      aggregationInfo=dict(
        fields=([(modelSpec["valueFieldName"], aggSpec["func"])]
                if aggSpec is not None else []),
        seconds=aggSpec["windowSize"] if aggSpec is not None else 0
      ),
      inputFields=inputRecordSchema)

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=inputRecordSchema)

    self._model = self._createModel(modelSpec=modelSpec)

    self._anomalyLikelihood = AnomalyLikelihood()

    self._csvReader = self._createCsvReader(inputFileObj)


  @staticmethod
  def _createModel(modelSpec):
    """Instantiate and configure an OPF model

    :param dict modelSpec: Model specification per model_opt_schema.json

    :returns: OPF Model instance
    """

    model = ModelFactory.create(modelConfig=modelSpec["modelConfig"])
    model.enableLearning()
    model.enableInference(modelSpec["inferenceArgs"])

    return model


  @staticmethod
  def _createCsvReader(fileObj):
    # We'll be operating on csvs with arbitrarily long fields
    csv.field_size_limit(2**27)

    # Make sure readline() works on windows too
    os.linesep = "\n"

    return csv.reader(fileObj, dialect="excel")


  @classmethod
  def _emitOutputMessage(cls, dataRow, anomalyProbability):
    """Emit output message to stdout

    :param list dataRow: the two-tuple data row on which anomalyProbability was
      computed, whose first element is datetime timestamp and second element is
      the float scalar value
    :param float anomalyProbability: computed anomaly probability value
    """

    message = "%s\n" % (json.dumps([dataRow[0].isoformat(), dataRow[1], anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, fields):
    """ Compute anomaly log likelihood score

    :param tuple fields: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(fields)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=fields[1],
      anomalyScore=rawAnomalyScore,
      timestamp=fields[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """

    numRowsToSkip = self._inputSpec["rowOffset"]
    datetimeFormat = self._inputSpec["datetimeFormat"]
    inputRowTimestampIndex = self._inputSpec["timestampIndex"]
    inputRowValueIndex = self._inputSpec["valueIndex"]

    g_log.info("Processing model=%s", self._modelId)

    for inputRow in self._csvReader:
      g_log.debug("Got inputRow=%r", inputRow)

      if numRowsToSkip > 0:
        numRowsToSkip -= 1
        g_log.debug("Skipping header row %s; %s rows left to skip",
                    inputRow, numRowsToSkip)
        continue

      # Extract timestamp and value
      # NOTE: the order must match the `inputFields` that we passed to the
      # Aggregator constructor
      fields = [
        date_time_utils.parseDatetime(inputRow[inputRowTimestampIndex],
                                      datetimeFormat),
        float(inputRow[inputRowValueIndex])
      ]

      # Aggregate
      aggRow, _ = self._aggregator.next(fields, None)
      g_log.debug("Aggregator returned %s for %s", aggRow, fields)
      if aggRow is not None:
        self._emitOutputMessage(
          dataRow=aggRow,
          anomalyProbability=self._computeAnomalyProbability(aggRow))


    # Reap remaining data from aggregator
    aggRow, _ = self._aggregator.next(None, curInputBookmark=None)
    g_log.debug("Aggregator reaped %s in final call", aggRow)
    if aggRow is not None:
      self._emitOutputMessage(
        dataRow=aggRow,
        anomalyProbability=self._computeAnomalyProbability(aggRow))
Ejemplo n.º 36
0
# FIFO
events = reversed(events)

if PREDICT: import PREDICTmodel_params as model_params

else: import model_params as model_params


if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE)  
else: model = ModelFactory.create(model_params.MODEL_PARAMS)
if VISUALIZE: Patcher().patchCLAModel(model)
model.enableInference({"predictedField": "event"})
print "Model created!\n"

# Get the Model-Classes:
anomalyLikelihood = AnomalyLikelihood()
if PREDICT: 
  from nupic.data.inference_shifter import InferenceShifter
  shifter = InferenceShifter()

if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE)
else: AnomalyScores = deque() # numpy.ones(len(events)), maxlen=len(events) ?
LikelihoodScores = deque()

r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"),
                port=int(os.environ.get("REDIS_PORT", 6379)),
                db=int(os.environ.get("REDIS_DB", 0)))


# Feed data into the model:
print "Start Data-Feed...\n"
Ejemplo n.º 37
0
def runAnomaly(options):
  """
  Create and run a CLA Model on the given dataset (based on the hotgym anomaly
  client in NuPIC).
  """
  # Load the model params JSON
  with open("model_params.json") as fp:
    modelParams = json.load(fp)

  # Update the resolution value for the encoder
  sensorParams = modelParams['modelParams']['sensorParams']
  numBuckets = modelParams['modelParams']['sensorParams']['encoders']['value'].pop('numBuckets')
  resolution = options.resolution
  if resolution is None:
    resolution = max(0.001,
                     (options.max - options.min) / numBuckets)
  print "Using resolution value: {0}".format(resolution)
  sensorParams['encoders']['value']['resolution'] = resolution

  model = ModelFactory.create(modelParams)
  model.enableInference({'predictedField': 'value'})
  with open (options.inputFile) as fin:
    
    # Open file and setup headers
    # Here we write the log likelihood value as the 'anomaly score'
    # The actual CLA outputs are labeled 'raw anomaly score'
    reader = csv.reader(fin)
    csvWriter = csv.writer(open(options.outputFile,"wb"))
    csvWriter.writerow(["timestamp", "value",
                        "_raw_score", "likelihood_score", "log_likelihood_score"])
    headers = reader.next()
    
    # The anomaly likelihood object
    anomalyLikelihood = AnomalyLikelihood()
    
    # Iterate through each record in the CSV file
    print "Starting processing at",datetime.datetime.now()
    for i, record in enumerate(reader, start=1):
      
      # Convert input data to a dict so we can pass it into the model
      inputData = dict(zip(headers, record))
      inputData["value"] = float(inputData["value"])
      inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
      #inputData["dttm"] = datetime.datetime.now()
      
      # Send it to the CLA and get back the raw anomaly score
      result = model.run(inputData)
      anomalyScore = result.inferences['anomalyScore']
      
      # Compute the Anomaly Likelihood
      likelihood = anomalyLikelihood.anomalyProbability(
        inputData["value"], anomalyScore, inputData["dttm"])
      logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
      if likelihood > 0.9999:
        print "Anomaly detected:",inputData['dttm'],inputData['value'],likelihood

      # Write results to the output CSV file
      csvWriter.writerow([inputData["dttm"], inputData["value"],
                          anomalyScore, likelihood, logLikelihood])

      # Progress report
      if (i%1000) == 0: print i,"records processed"

  print "Completed processing",i,"records at",datetime.datetime.now()
  print "Anomaly scores for",options.inputFile,
  print "have been written to",options.outputFile
Ejemplo n.º 38
0
def foreach_batch_function(df, epoch_id):
    # Transform and write batchDF
    row = df.collect()
    print "Size of Batch"
    print(len(row))
    if len(row) != 0:
        for x in range(len(row)):
            nb = nb + 1
            record = {}
            level = row[x]['level']
            #print(type(level))
            timestamp = row[x]['@timestamp']
            #print(type(timestamp))
            #print(timestamp)
            #timestamp = timestamp.encode("utf-8")
            level = level.encode("utf-8")

            if level == 'INFO' or level == 'info':
                level = 'info'
            elif level == 'ERROR' or level == 'error':
                level = 'error'
            else:
                level = 'warning'
            #print 'step 2'
            record = {"timestamp": timestamp, "level": level}
            print(record)
            result = model.run(record)
            anom = result.inferences['anomalyScore']
            #print(anom)
            record_anomalies.append(anom)
            #print "Lengths of record anomalies"
            #print(len(record_anomalies))
            mean_anomalies = np.mean(record_anomalies)
            std_anomalies = np.std(record_anomalies)
            if std_anomalies == 0:
                std_anomalies = 0.00001
            var_anomalies = np.var(record_anomalies)
            mean_anomalies_short_window = np.mean(
                record_anomalies[-int(history):])

            likelihood = 1 - (
                (norm.cdf(anom, mean_anomalies_short_window - mean_anomalies,
                          std_anomalies)) -
                (norm.cdf(0, mean_anomalies_short_window - mean_anomalies,
                          std_anomalies)))

            likelihood_test = 1 - (
                anom -
                (mean_anomalies_short_window - mean_anomalies)) / std_anomalies
            likelihood_test_test = 1 - qfunction(
                (mean_anomalies_short_window - mean_anomalies) / std_anomalies)
            print "Likelihood"
            print(likelihood_test_test)
            anomalyLikelihood = AnomalyLikelihood()
            anomalyProbability = anomalyLikelihood.anomalyProbability(
                record['level'], anom, record['timestamp'])
            ani = animation.FuncAnimation(fig,
                                          animate,
                                          interval=1000,
                                          x=nb,
                                          y=likelihood_test_test)
            plt.show()
            if likelihood_test_test >= 0.85:
                print "Anomaly detected!"
                print "Probability od being abnormal", likelihood_test_test
                #ibefore = i
                #if ibefore - iafter == 1:
                #    region = region + 1
                #    if region == 20:
                #        print i-20
                #        print 'Anomaly detcted!'
                #        print 'Probability of being abnormal', likelihood_test_test
                #        print 'Probability of being abnormal (nupic)', anomalyProbability
                #        region_anomaly = region_anomaly + 1
                #else :
                #    region = 0

                #iafter = ibefore
    pass
Ejemplo n.º 39
0
# FIFO
events = reversed(events)

if PREDICT: import PREDICTmodel_params as model_params

else: import model_params as model_params

if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE)
else: model = ModelFactory.create(model_params.MODEL_PARAMS)
if VISUALIZE: Patcher().patchCLAModel(model)
model.enableInference({"predictedField": "event"})
print "Model created!\n"

# Get the Model-Classes:
anomalyLikelihood = AnomalyLikelihood()
if PREDICT:
    from nupic.data.inference_shifter import InferenceShifter
    shifter = InferenceShifter()

if (WINDOWSIZE != None):
    AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE)
else:
    AnomalyScores = deque()  # numpy.ones(len(events)), maxlen=len(events) ?
LikelihoodScores = deque()

r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"),
                port=int(os.environ.get("REDIS_PORT", 6379)),
                db=int(os.environ.get("REDIS_DB", 0)))

# Feed data into the model:
Ejemplo n.º 40
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  :param slidingWindowSize: [optional] - how many elements are summed up;
      enables moving average on final anomaly score; int >= 0

  :param mode: (string) [optional] how to compute anomaly, one of:

      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_PURE`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_LIKELIHOOD`
      - :const:`nupic.algorithms.anomaly.Anomaly.MODE_WEIGHTED`

  :param binaryAnomalyThreshold: [optional] if set [0,1] anomaly score
       will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
       The transformation is applied after moving average is computed.

  """


  # anomaly modes supported
  MODE_PURE = "pure"
  """
  Default mode. The raw anomaly score as computed by
  :func:`~.anomaly_likelihood.computeRawAnomalyScore`
  """
  MODE_LIKELIHOOD = "likelihood"
  """
  Uses the :class:`~.anomaly_likelihood.AnomalyLikelihood` class, which models
  probability of receiving this value and anomalyScore
  """
  MODE_WEIGHTED = "weighted"
  """
  Multiplies the likelihood result with the raw anomaly score that was used to
  generate the likelihood (anomaly * likelihood)
  """

  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self,
               slidingWindowSize=None,
               mode=MODE_PURE,
               binaryAnomalyThreshold=None):
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if (self._mode == Anomaly.MODE_LIKELIHOOD or
        self._mode == Anomaly.MODE_WEIGHTED):
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    else:
      self._likelihood = None

    if not self._mode in self._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)

    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and (
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")


  def compute(self, activeColumns, predictedColumns,
              inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    :param activeColumns: array of active column indices
    :param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    :param inputValue: (optional) value of current input to encoders
                                  (eg "cat" for category encoder)
                                  (used in anomaly-likelihood)
    :param timestamp: (optional) date timestamp when the sample occured
                                 (used in anomaly-likelihood)
    :returns: the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    # apply binary discretization if required
    if self._binaryThreshold is not None:
      if score >= self._binaryThreshold:
        score = 1.0
      else:
        score = 0.0

    return score


  def __str__(self):
    windowSize = 0
    if self._movingAverage is not None:
      windowSize = self._movingAverage.windowSize
    return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)


  def __eq__(self, other):
    return (isinstance(other, Anomaly) and
            other._mode == self._mode and
            other._binaryThreshold == self._binaryThreshold and
            other._movingAverage == self._movingAverage and
            other._likelihood == self._likelihood)


  def __setstate__(self, state):
    """deserialization"""
    self.__dict__.update(state)

    if not hasattr(self, '_mode'):
      self._mode = Anomaly.MODE_PURE
    if not hasattr(self, '_movingAverage'):
      self._movingAverage = None
    if not hasattr(self, '_binaryThreshold'):
      self._binaryThreshold = None
Ejemplo n.º 41
0
      minLatitude = lat

    if float(earthquake.longitude) > maxLongitude:
      maxLongitude = lng

    if float(earthquake.longitude) < minLongitude:
      minLongitude = lng

  inp.seek(0)
  next(inp)

# Create Model
model = ModelFactory.create(model_params.MODEL_PARAMS)
model.enableInference({"predictedField": "event"})

anomalyLikelihood = AnomalyLikelihood()

scores=deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE)
likelihoodScores=deque(maxlen=100)

r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"),
                port=int(os.environ.get("REDIS_PORT", 6379)),
                db=int(os.environ.get("REDIS_DB", 0)))

for n, earthquake in enumerate(reversed(earthquakes)):

  x = int(10000 * abs(float(earthquake.longitude) - minLongitude))
  y = int(10000 * abs(float(earthquake.latitude) - minLatitude))

  try:
    event = (numpy.array([x, y]), int(10*float(earthquake.mag)))
Ejemplo n.º 42
0
class _ModelRunner(object):
  """ Use OPF Model to process metric data samples from stdin and and emit
  anomaly likelihood results to stdout
  """

  # Input column meta info compatible with parameters generated by
  # getScalarMetricWithTimeOfDayAnomalyParams
  _INPUT_RECORD_SCHEMA = (
    fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime,
                            fieldmeta.FieldMetaSpecial.timestamp),
    fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float,
                            fieldmeta.FieldMetaSpecial.none),
  )


  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()


  @classmethod
  def _createModel(cls, stats):
    """Instantiate and configure an OPF model

    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :returns: OPF Model instance
    """
    # Generate swarm params
    swarmParams = getScalarMetricWithTimeOfDayAnomalyParams(
      metricData=[0],
      minVal=stats["min"],
      maxVal=stats["max"],
      minResolution=stats.get("minResolution"))

    model = ModelFactory.create(modelConfig=swarmParams["modelConfig"])
    model.enableLearning()
    model.enableInference(swarmParams["inferenceArgs"])

    return model


  @classmethod
  def _readInputMessages(cls):
    """Create a generator that waits for and yields input messages from
    stdin

    yields two-tuple (<timestamp>, <scalar-value>), where <timestamp> is the
    `datetime.datetime` timestamp of the metric data sample and <scalar-value>
    is the floating point value of the metric data sample.
    """
    while True:
      message = sys.stdin.readline()

      if message:
        timestamp, scalarValue = json.loads(message)
        yield (datetime.utcfromtimestamp(timestamp), scalarValue)
      else:
        # Front End closed the pipe (or died)
        break


  @classmethod
  def _emitOutputMessage(cls, rowIndex, anomalyProbability):
    """Emit output message to stdout

    :param int rowIndex: 0-based index of corresponding input sample
    :param float anomalyProbability: computed anomaly probability value
    """
    message = "%s\n" % (json.dumps([rowIndex, anomalyProbability]),)

    sys.stdout.write(message)
    sys.stdout.flush()


  def _computeAnomalyProbability(self, inputRow):
    """ Compute anomaly log likelihood score

    :param tuple inputRow: Two-tuple input metric data row
      (<datetime-timestamp>, <float-scalar>)

    :returns: Log-scaled anomaly probability
    :rtype: float
    """
    # Generate raw anomaly score
    inputRecord = self._modelRecordEncoder.encode(inputRow)
    rawAnomalyScore = self._model.run(inputRecord).inferences["anomalyScore"]

    # Generate anomaly likelihood score
    anomalyProbability = self._anomalyLikelihood.anomalyProbability(
      value=inputRow[1],
      anomalyScore=rawAnomalyScore,
      timestamp=inputRow[0])

    return self._anomalyLikelihood.computeLogLikelihood(anomalyProbability)


  def run(self):
    """ Run the model: ingest and process the input metric data and emit output
    messages containing anomaly scores
    """
    g_log.info("Processing model=%s", self._modelId)

    for rowIndex, inputRow in enumerate(self._readInputMessages()):
      anomalyProbability = self._computeAnomalyProbability(inputRow)

      self._emitOutputMessage(rowIndex=rowIndex,
                              anomalyProbability=anomalyProbability)
Ejemplo n.º 43
0
  def read(cls, proto):
    anomalyLikelihoodRegion = object.__new__(cls)
    anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(proto)

    return anomalyLikelihoodRegion
def runAnomaly(options):
    """
  Create and run a CLA Model on the given dataset (based on the hotgym anomaly
  client in NuPIC).
  """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print "Using resolution value: {0}".format(resolution)
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    with open(options.inputFile) as fin:

        # Open file and setup headers
        # Here we write the log likelihood value as the 'anomaly score'
        # The actual CLA outputs are labeled 'raw anomaly score'
        reader = csv.reader(fin)
        csvWriter = csv.writer(open(options.outputFile, "wb"))
        csvWriter.writerow([
            "timestamp", "value", "_raw_score", "likelihood_score",
            "log_likelihood_score"
        ])
        headers = reader.next()

        # The anomaly likelihood object
        anomalyLikelihood = AnomalyLikelihood()

        # Iterate through each record in the CSV file
        print "Starting processing at", datetime.datetime.now()
        for i, record in enumerate(reader, start=1):

            # Convert input data to a dict so we can pass it into the model
            inputData = dict(zip(headers, record))
            inputData["value"] = float(inputData["value"])
            inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
            #inputData["dttm"] = datetime.datetime.now()

            # Send it to the CLA and get back the raw anomaly score
            result = model.run(inputData)
            anomalyScore = result.inferences['anomalyScore']

            # Compute the Anomaly Likelihood
            likelihood = anomalyLikelihood.anomalyProbability(
                inputData["value"], anomalyScore, inputData["dttm"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
            if likelihood > 0.9999:
                print "Anomaly detected:", inputData['dttm'], inputData[
                    'value'], likelihood

            # Write results to the output CSV file
            csvWriter.writerow([
                inputData["dttm"], inputData["value"], anomalyScore,
                likelihood, logLikelihood
            ])

            # Progress report
            if (i % 1000) == 0: print i, "records processed"

    print "Completed processing", i, "records at", datetime.datetime.now()
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
Ejemplo n.º 45
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """


  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)


  def compute(self, activeColumns, predictedColumns, 
				inputValue=None, timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      if inputValue is None:
        raise ValueError("Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                 "requires 'inputValue' as parameter to compute() method. ")

      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      # low likelihood -> hi anomaly
      score = 1 - probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          inputValue, anomalyScore, timestamp)
      score = anomalyScore * (1 - probability)

    # Last, do moving-average if windowSize was specified.
    if self._movingAverage is not None:
      score = self._movingAverage.next(score)

    return score
Ejemplo n.º 46
0
class Anomaly(object):
  """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """


  # anomaly modes supported
  MODE_PURE = "pure"
  MODE_LIKELIHOOD = "likelihood"
  MODE_WEIGHTED = "weighted"
  _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)


  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    self._useMovingAverage = slidingWindowSize > 0
    self._buf = None
    self._i = None

    # Using cumulative anomaly, sliding window
    if self._useMovingAverage:
      self._windowSize = slidingWindowSize
      # Sliding window buffer
      self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
      self._i = 0 # index pointer to actual position
    elif slidingWindowSize is not None:
      raise TypeError(
          "Anomaly: if you define slidingWindowSize, it has to be an "
          "integer > 0;  slidingWindowSize=%r" % slidingWindowSize)

    if self._mode == Anomaly.MODE_LIKELIHOOD:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)


  def computeAnomalyScore(self, activeColumns, predictedColumns, value=None,
                          timestamp=None):
    """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param value: (optional) metric value of current input
                              (used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              (used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
    # Start by computing the raw anomaly score.
    anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

    # Compute final anomaly based on selected mode.
    if self._mode == Anomaly.MODE_PURE:
      score = anomalyScore
    elif self._mode == Anomaly.MODE_LIKELIHOOD:
      # TODO add tests for likelihood modes
      probability = self._likelihood.anomalyProbability(
          value, anomalyScore, timestamp)
      score = probability
    elif self._mode == Anomaly.MODE_WEIGHTED:
      probability = self._likelihood.anomalyProbability(
          value, anomalyScore, timestamp)
      score = anomalyScore * probability

    # Last, do moving-average if windowSize was specified.
    if self._useMovingAverage:
      score = self._movingAverage(score)

    return score


  def _movingAverage(self, newElement=None):
    """moving average

    @param newValue (optional) add a new element before computing the avg
    @return moving average of self._windowSize last elements
    """
    if newElement is not None:
      self._buf[self._i]= newElement
      self._i = (self._i + 1) % self._windowSize
    return self._buf.sum() / float(self._windowSize)  # normalize to 0..1
Ejemplo n.º 47
0
    result = model.run(record)
    #print "prediction: ", result.inferences["multiStepBestPredictions"][1]
    anom = result.inferences['anomalyScore']
    #print "anomaly score: ", anom
    record_anomalies.append(anom)
    mean_anomalies = np.mean(record_anomalies)
    std_anomalies = np.std(record_anomalies)
    var_anomalies = np.var(record_anomalies)
    mean_anomalies_short_window = np.mean(record_anomalies[-int(0.05 * i):])
    #likelihood = 1-((norm.cdf(anom, mean_anomalies_short_window-mean_anomalies, std_anomalies))-(norm.cdf(0, mean_anomalies_short_window-mean_anomalies, std_anomalies)))
    likelihood = 1 - ((norm.cdf(
        anom, mean_anomalies_short_window - mean_anomalies, std_anomalies)) -
                      (norm.cdf(0, mean_anomalies_short_window -
                                mean_anomalies, std_anomalies)))
    #print "likelihood score: ", likelihood
    anomalyLikelihood = AnomalyLikelihood()
    anomalyProbability = anomalyLikelihood.anomalyProbability(
        record['level'], anom, record['timestamp'])
    #print "anomalyProbability: ", anomalyProbability
    if likelihood >= 1:
        cpt = cpt + 1
        print i
        print "Anomaly detected!"

print "Total nb of anomalies", cpt
"""
data = getData()
for _ in xrange(5):
    print data.next()

with open('export_dataframe_df2.csv') as inputFile:
Ejemplo n.º 48
0

import TESTmodel_params as model_params

if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE)  
else: model = ModelFactory.create(model_params.MODEL_PARAMS)

if VISUALIZE: Patcher().patchCLAModel(model)

model.enableInference({"predictedField": "event"}) # Predict not only event but also scalar! TODO
# model.enableInference({"predictedField": "scalar"})
# model.enableInference({"predictedField": "timestamp"})
print "Model created!\n"

# Get the Model-Classes:
anomalyLikelihood = AnomalyLikelihood()

if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE)  
else: AnomalyScores = deque() 
LikelihoodScores = deque()

if REDIS:
  r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"),
                port=int(os.environ.get("REDIS_PORT", 6379)),
                db=int(os.environ.get("REDIS_DB", 0)))


print "Start Data-Feed...\n"
for n, event in enumerate(events):
  # Cluster or not? // Convert all Coords. to non-neg Ints (better handeled by the model):
  if CLUSTERING:
Ejemplo n.º 49
0
class Anomaly(object):
    """Utility class for generating anomaly scores in different ways.

  Supported modes:
    MODE_PURE - the raw anomaly score as computed by computeRawAnomalyScore
    MODE_LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
        anomaly scores
    MODE_WEIGHTED - multiplies the likelihood result with the raw anomaly score
        that was used to generate the likelihood
  """

    # anomaly modes supported
    MODE_PURE = "pure"
    MODE_LIKELIHOOD = "likelihood"
    MODE_WEIGHTED = "weighted"
    _supportedModes = (MODE_PURE, MODE_LIKELIHOOD, MODE_WEIGHTED)

    def __init__(self,
                 slidingWindowSize=None,
                 mode=MODE_PURE,
                 binaryAnomalyThreshold=None):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score
         will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
         The transformation is applied after moving average is computed and updated.
    """
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)
        self._binaryThreshold = binaryAnomalyThreshold
        if binaryAnomalyThreshold is not None and (
                not isinstance(binaryAnomalyThreshold, float)
                or binaryAnomalyThreshold >= 1.0
                or binaryAnomalyThreshold <= 0.0):
            raise ValueError(
                "Anomaly: binaryAnomalyThreshold must be from (0,1) "
                "or None if disabled.")

    def compute(self,
                activeColumns,
                predictedColumns,
                inputValue=None,
                timestamp=None):
        """Compute the anomaly score as the percent of active columns not predicted.

    @param activeColumns: array of active column indices
    @param predictedColumns: array of columns indices predicted in this step
                             (used for anomaly in step T+1)
    @param inputValue: (optional) value of current input to encoders 
				(eg "cat" for category encoder)
                              	(used in anomaly-likelihood)
    @param timestamp: (optional) date timestamp when the sample occured
                              	(used in anomaly-likelihood)
    @return the computed anomaly score; float 0..1
    """
        # Start by computing the raw anomaly score.
        anomalyScore = computeRawAnomalyScore(activeColumns, predictedColumns)

        # Compute final anomaly based on selected mode.
        if self._mode == Anomaly.MODE_PURE:
            score = anomalyScore
        elif self._mode == Anomaly.MODE_LIKELIHOOD:
            if inputValue is None:
                raise ValueError(
                    "Selected anomaly mode 'Anomaly.MODE_LIKELIHOOD' "
                    "requires 'inputValue' as parameter to compute() method. ")

            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            # low likelihood -> hi anomaly
            score = 1 - probability
        elif self._mode == Anomaly.MODE_WEIGHTED:
            probability = self._likelihood.anomalyProbability(
                inputValue, anomalyScore, timestamp)
            score = anomalyScore * (1 - probability)

        # Last, do moving-average if windowSize was specified.
        if self._movingAverage is not None:
            score = self._movingAverage.next(score)

        # apply binary discretization if required
        if self._binaryThreshold is not None:
            if score >= self._binaryThreshold:
                score = 1.0
            else:
                score = 0.0

        return score

    def __str__(self):
        windowSize = 0
        if self._movingAverage is not None:
            windowSize = self._movingAverage.windowSize
        return "Anomaly:\tmode=%s\twindowSize=%r" % (self._mode, windowSize)

    def __setstate__(self, state):
        """deserialization"""
        self.__dict__.update(state)

        if not hasattr(self, '_mode'):
            self._mode = Anomaly.MODE_PURE
        if not hasattr(self, '_movingAverage'):
            self._movingAverage = None
        if not hasattr(self, '_binaryThreshold'):
            self._binaryThreshold = None
Ejemplo n.º 50
0
if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE)  
else: model = ModelFactory.create(model_params.MODEL_PARAMS)

if VISUALIZE: Patcher().patchCLAModel(model)

model.enableInference({"predictedField": "event"}) # Predict not only event but also scalar! TODO
# model.enableInference({"predictedField": "scalar"})
# model.enableInference({"predictedField": "timestamp"})
print "Model created!\n"

# Get the Model-Classes:
if PREDICT: 
  from nupic.data.inference_shifter import InferenceShifter
  shifter = InferenceShifter()

anomalyLikelihood = AnomalyLikelihood()

if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE)  
else: AnomalyScores = deque() 
LikelihoodScores = deque()

if REDIS:
  r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"),
                port=int(os.environ.get("REDIS_PORT", 6379)),
                db=int(os.environ.get("REDIS_DB", 0)))


print "Start Data-Feed...\n"
for n, event in enumerate(events):
  # Cluster or not? // Convert all Coords. to non-neg Ints (better handeled by the model):
  if CLUSTERING:
Ejemplo n.º 51
0
class AnomalyLikelihoodRegion(PyRegion):
    """Region for computing the anomaly likelihoods."""
    @classmethod
    def getSpec(cls):
        return {
            "description": ("Region that computes anomaly likelihoods for \
                       temporal memory."),
            "singleNodeOnly":
            True,
            "inputs": {
                "rawAnomalyScore": {
                    "description": "The anomaly score whose \
                          likelihood is to be computed",
                    "dataType": "Real32",
                    "count": 1,
                    "required": True,
                    "isDefaultInput": False
                },
                "metricValue": {
                    "description": "The input metric value",
                    "dataType": "Real32",
                    "count": 1,
                    "required": True,
                    "isDefaultInput": False
                },
            },
            "outputs": {
                "anomalyLikelihood": {
                    "description": "The resultant anomaly likelihood",
                    "dataType": "Real32",
                    "count": 1,
                    "isDefaultOutput": True,
                },
            },
            "parameters": {
                "learningPeriod": {
                    "description": "The number of iterations required for the\
                          algorithm to learn the basic patterns in the dataset\
                          and for the anomaly score to 'settle down'.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 288,
                    "accessMode": "ReadWrite"
                },
                "estimationSamples": {
                    "description": "The number of reasonable anomaly scores\
                           required for the initial estimate of the\
                           Gaussian.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 100,
                    "accessMode": "ReadWrite"
                },
                "historicWindowSize": {
                    "description": "Size of sliding window of historical data\
                          points to maintain for periodic reestimation\
                          of the Gaussian.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 8640,
                    "accessMode": "ReadWrite"
                },
                "reestimationPeriod": {
                    "description": "How often we re-estimate the Gaussian\
                          distribution.",
                    "dataType": "UInt32",
                    "count": 1,
                    "constraints": "",
                    "defaultValue": 100,
                    "accessMode": "ReadWrite"
                },
            },
            "commands": {},
        }

    def __init__(self,
                 learningPeriod=288,
                 estimationSamples=100,
                 historicWindowSize=8640,
                 reestimationPeriod=100):
        self.anomalyLikelihood = AnomalyLikelihood(
            learningPeriod=learningPeriod,
            estimationSamples=estimationSamples,
            historicWindowSize=historicWindowSize,
            reestimationPeriod=reestimationPeriod)

    def __eq__(self, other):
        return self.anomalyLikelihood == other.anomalyLikelihood

    def __ne__(self, other):
        return not self == other

    @classmethod
    def read(cls, proto):
        anomalyLikelihoodRegion = object.__new__(cls)
        anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(
            proto)

        return anomalyLikelihoodRegion

    def write(self, proto):
        self.anomalyLikelihood.write(proto)

    def initialize(self):
        pass

    def compute(self, inputs, outputs):
        anomalyScore = inputs["rawAnomalyScore"][0]
        value = inputs["metricValue"][0]
        anomalyProbability = self.anomalyLikelihood.anomalyProbability(
            value, anomalyScore)
        outputs["anomalyLikelihood"][0] = anomalyProbability
Ejemplo n.º 52
0
    def read(cls, proto):
        anomalyLikelihoodRegion = object.__new__(cls)
        anomalyLikelihoodRegion.anomalyLikelihood = AnomalyLikelihood.read(
            proto)

        return anomalyLikelihoodRegion
Ejemplo n.º 53
0
 def getSchema(cls):
   return AnomalyLikelihood.getSchema()