Ejemplo n.º 1
0
def _addMetric(engine, metricName):
  """Add the new metric to the database."""
  if metricName in gCustomMetrics:
    try:
      # Attempt to reload the metric
      metricId = gCustomMetrics[metricName][0].uid
      with engine.connect() as conn:
        gCustomMetrics[metricName][0] = repository.getMetric(conn, metricId)
      return
    except htmengine.exceptions.ObjectNotFoundError:
      # Do nothing, we will create new metric and update cache below
      pass

  # Use the adapter to create the metric
  try:
    metricId = createCustomDatasourceAdapter().createMetric(metricName)
  except htmengine.exceptions.MetricAlreadyExists as e:
    metricId = e.uid

  with engine.connect() as conn:
    metric = repository.getMetric(conn, metricId)

  # Add it to our cache
  gCustomMetrics[metricName] = [metric, datetime.datetime.utcnow()]

  _trimMetricCache()
Ejemplo n.º 2
0
def _addMetric(engine, metricName):
    """Add the new metric to the database."""
    if metricName in gCustomMetrics:
        try:
            # Attempt to reload the metric
            metricId = gCustomMetrics[metricName][0].uid
            with engine.connect() as conn:
                gCustomMetrics[metricName][0] = repository.getMetric(
                    conn, metricId)
            return
        except htmengine.exceptions.ObjectNotFoundError:
            # Do nothing, we will create new metric and update cache below
            pass

    # Use the adapter to create the metric
    try:
        metricId = createCustomDatasourceAdapter().createMetric(metricName)
    except htmengine.exceptions.MetricAlreadyExists as e:
        metricId = e.uid

    with engine.connect() as conn:
        metric = repository.getMetric(conn, metricId)

    # Add it to our cache
    gCustomMetrics[metricName] = [metric, datetime.datetime.utcnow()]

    _trimMetricCache()
  def testActivateModelClassifierEnabled(self):
    """ Test activateModel with classifier enabled in model spec. """
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",
      "metricSpec": {
        "metric": metricName
      },
      "modelParams": {
        "enableClassifier": True
      }
    }

    adapter.monitorMetric(modelSpec)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.status])
    self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)

    # Add some data
    data = [
      (0, datetime.datetime.utcnow() - datetime.timedelta(minutes=5)),
      (100, datetime.datetime.utcnow())
    ]
    with self.engine.connect() as conn:
      repository.addMetricData(conn, metricId, data)

    # Activate model
    adapter.activateModel(metricId)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.status,
                                               schema.metric.c.model_params])
    self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING,
                                     MetricStatus.ACTIVE))

    self._assertClassifierStatusInModelParams(metricObj.model_params,
                                              classifierEnabled=True)

    g_log.info("Waiting for model to become active")
    self.checkModelIsActive(metricId)

    g_log.info("Waiting at least one model result")
    self.checkModelResultsSize(metricId, 1, atLeast=True)
Ejemplo n.º 4
0
    def testActivateModelClassifierEnabled(self):
        """ Test activateModel with classifier enabled in model spec. """
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            },
            "modelParams": {
                "enableClassifier": True
            }
        }

        adapter.monitorMetric(modelSpec)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(conn,
                                             metricId,
                                             fields=[schema.metric.c.status])
        self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)

        # Add some data
        data = [(0,
                 datetime.datetime.utcnow() - datetime.timedelta(minutes=5)),
                (100, datetime.datetime.utcnow())]
        with self.engine.connect() as conn:
            repository.addMetricData(conn, metricId, data)

        # Activate model
        adapter.activateModel(metricId)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(
                conn,
                metricId,
                fields=[schema.metric.c.status, schema.metric.c.model_params])
        self.assertIn(metricObj.status,
                      (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE))

        self._assertClassifierStatusInModelParams(metricObj.model_params,
                                                  classifierEnabled=True)

        g_log.info("Waiting for model to become active")
        self.checkModelIsActive(metricId)

        g_log.info("Waiting at least one model result")
        self.checkModelResultsSize(metricId, 1, atLeast=True)
  def testMonitorMetricWithResource(self):
    """Test monitorMetric that includes an explicit resource string."""
    metricName = "test-" + uuid.uuid1().hex
    resource = "Test Resource"

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",

      "metricSpec": {
        "metric": metricName,
        "resource": resource,
      }
    }

    adapter.monitorMetric(modelSpec)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.parameters,
                                               schema.metric.c.status,
                                               schema.metric.c.server])

    self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)
    self.assertEqual(json.loads(metricObj.parameters), modelSpec)
    self.assertEqual(metricObj.server, resource)

    self._validateModelSpec(json.loads(metricObj.parameters))
Ejemplo n.º 6
0
  def checkEncoderResolution(self, uid, minVal, maxVal, minResolution=None):
    """Check that encoder resolution is computed correctly."""
    engine = repository.engineFactory(config=self.__config)
    with engine.begin() as conn:
      metricObj = repository.getMetric(conn,
                                       uid,
                                       fields=[schema.metric.c.name,
                                               schema.metric.c.model_params])

    modelParams = json.loads(metricObj.model_params)
    self.assertNotEqual(modelParams, None,
                        "No model exists for metric %s" % metricObj.name)
    sensorParams = modelParams["modelConfig"]["modelParams"]["sensorParams"]
    encoderParams = sensorParams["encoders"]["c1"]
    # Estimate and check the bounds for the resolution based on min and max
    lower = (maxVal - minVal) / 300.0
    upper = (maxVal - minVal) / 80.0

    if minResolution is not None:
      lower = max(minResolution, lower)
      upper = float("inf")

    resolution = encoderParams["resolution"]

    self.assertGreaterEqual(resolution, lower)
    self.assertLessEqual(resolution, upper)
  def testUnmonitorMetricPendingData(self):
    """ Test unmonitorMetric on metric in PENDING_DATA state """
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",

      "metricSpec": {
        "metric": metricName
      }
    }

    adapter.monitorMetric(modelSpec)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.parameters,
                                               schema.metric.c.status])
    self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)
    self.assertEqual(json.loads(metricObj.parameters), modelSpec)

    self._validateModelSpec(json.loads(metricObj.parameters))

    # Turn off monitoring
    adapter.unmonitorMetric(metricId)

    self.checkMetricUnmonitoredById(metricId)
Ejemplo n.º 8
0
  def activateModel(self, metricId):
    """ Start a model that is PENDING_DATA, creating the OPF/CLA model

    NOTE: used by MetricStreamer when model is in PENDING_DATA state and
      sufficient data samples are available to get statistics and complete model
      creation.

    :param metricId: unique identifier of the metric row

    :raises htmengine.exceptions.ObjectNotFoundError: if metric with the
      referenced metric uid doesn't exist

    :raises htmengine.exceptions.MetricStatisticsNotReadyError:
    """
    # Load the existing metric
    with self.connectionFactory() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.datasource])

    if metricObj.datasource != self._DATASOURCE:
      raise TypeError(
        "activateModel: not an HTM metric=%s; datasource=%s" %
        (metricId, metricObj.datasource))

    stats = self._getMetricStatistics(metricId)

    swarmParams = scalar_metric_utils.generateSwarmParams(stats)

    scalar_metric_utils.startModel(metricId,
                                   swarmParams=swarmParams,
                                   logger=self._log)
Ejemplo n.º 9
0
    def activateModel(self, metricId):
        """ Start a model that is PENDING_DATA, creating the OPF/CLA model

    NOTE: used by MetricStreamer when model is in PENDING_DATA state and
      sufficient data samples are available to get statistics and complete model
      creation.

    :param metricId: unique identifier of the metric row

    :raises htmengine.exceptions.ObjectNotFoundError: if metric with the
      referenced metric uid doesn't exist

    :raises htmengine.exceptions.MetricStatisticsNotReadyError:
    """
        # Load the existing metric
        with self.connectionFactory() as conn:
            metricObj = repository.getMetric(
                conn, metricId, fields=[schema.metric.c.datasource])

        if metricObj.datasource != self._DATASOURCE:
            raise TypeError(
                "activateModel: not an HTM metric=%s; datasource=%s" %
                (metricId, metricObj.datasource))

        stats = self._getMetricStatistics(metricId)

        swarmParams = scalar_metric_utils.generateSwarmParams(stats)

        scalar_metric_utils.startModel(metricId,
                                       swarmParams=swarmParams,
                                       logger=self._log)
Ejemplo n.º 10
0
    def checkEncoderResolution(self, uid, minVal, maxVal, minResolution=None):
        """Check that encoder resolution is computed correctly."""
        engine = repository.engineFactory(config=self.__config)
        with engine.begin() as conn:
            metricObj = repository.getMetric(
                conn,
                uid,
                fields=[schema.metric.c.name, schema.metric.c.model_params])

        modelParams = json.loads(metricObj.model_params)
        self.assertNotEqual(modelParams, None,
                            "No model exists for metric %s" % metricObj.name)
        sensorParams = modelParams["modelConfig"]["modelParams"][
            "sensorParams"]
        encoderParams = sensorParams["encoders"]["c1"]
        # Estimate and check the bounds for the resolution based on min and max
        lower = (maxVal - minVal) / 300.0
        upper = (maxVal - minVal) / 80.0

        if minResolution is not None:
            lower = max(minResolution, lower)
            upper = float("inf")

        resolution = encoderParams["resolution"]

        self.assertGreaterEqual(resolution, lower)
        self.assertLessEqual(resolution, upper)
Ejemplo n.º 11
0
    def testUnmonitorMetricPendingData(self):
        """ Test unmonitorMetric on metric in PENDING_DATA state """
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            }
        }

        adapter.monitorMetric(modelSpec)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(
                conn,
                metricId,
                fields=[schema.metric.c.parameters, schema.metric.c.status])
        self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)
        self.assertEqual(json.loads(metricObj.parameters), modelSpec)

        self._validateModelSpec(json.loads(metricObj.parameters))

        # Turn off monitoring
        adapter.unmonitorMetric(metricId)

        self.checkMetricUnmonitoredById(metricId)
Ejemplo n.º 12
0
    def test_MinMaxDelayedCreationNoMetricIntegrityErrorMER2190(self):
        """Tests that delayed creation doesn't cause integrity error in
    custom-metric model. It sends more than MODEL_CREATION_RECORD_THRESHOLD
    rows """
        metricName = (
            "testMinMaxDelayedCreationNoMetricIntegrityErrorMER2190.%i" %
            int(time.time()))
        LOGGER.info("Running test with metric name: %s", metricName)

        totalRowsToSend = MODEL_CREATION_RECORD_THRESHOLD + 700

        self.addCleanup(self._deleteMetric, metricName)

        def timeGenerator():
            """Generator for unix timestamps."""
            backoff = datetime.timedelta(minutes=5 * (totalRowsToSend + 1))
            dt = datetime.datetime.utcnow() - backoff
            td = datetime.timedelta(minutes=5)
            while True:
                dt += td
                yield int(calendar.timegm(dt.utctimetuple()))

        nextTime = timeGenerator()

        # Add custom metric data
        sock = socket.socket()
        sock.connect(("localhost", self.plaintextPort))
        sock.sendall("%s 0.0 %i\n" % (metricName, nextTime.next()))
        self.gracefullyCloseSocket(sock)

        uid = self.checkMetricCreated(metricName)
        LOGGER.info("Metric %s has uid: %s", metricName, uid)

        # Send model creation request
        nativeMetric = {"datasource": "custom", "metricSpec": {"uid": uid}}

        model = self._createModel(nativeMetric)
        self.assertEqual(model.status, MetricStatus.PENDING_DATA)

        # Add more data
        sock = socket.socket()
        sock.connect(("localhost", self.plaintextPort))
        for _ in xrange(totalRowsToSend - 1):
            sock.sendall("%s 7000.0 %i\n" % (metricName, nextTime.next()))
        self.gracefullyCloseSocket(sock)

        for _ in xrange(60):
            with self.engine.begin() as conn:
                metric = repository.getMetric(conn, uid)

            if metric.status == MetricStatus.ACTIVE:
                break
            LOGGER.info("Model=%s not ready.  Sleeping 5 seconds...")
            time.sleep(5)
        else:
            self.fail("Model results not available within 5 minutes")

        # Check that the data all got processed
        self.checkModelResultsSize(uid, totalRowsToSend)
Ejemplo n.º 13
0
    def testExportImport(self):
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Add some data
        # NOTE: we discard the fractional part because it gets eliminated
        # in the database, and we will want to compare against retrieved
        # items later.
        now = datetime.datetime.utcnow().replace(microsecond=0)
        data = [(0, now - datetime.timedelta(minutes=5)), (100, now)]

        with self.engine.connect() as conn:
            repository.addMetricData(conn, metricId, data)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            },
        }

        adapter.monitorMetric(modelSpec)

        def checkExportSpec(exportSpec):
            self.assertEqual(exportSpec["datasource"], modelSpec["datasource"])
            self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"])
            self.assertSequenceEqual(exportSpec["data"], data)

        # Export
        exportSpec = adapter.exportModel(metricId)
        checkExportSpec(exportSpec)

        # Delete metric
        adapter.deleteMetricByName(metricName)
        self.checkModelDeleted(metricId)

        # Import
        metricId = adapter.importModel(
            htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec)))

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(
                conn,
                metricId,
                fields=[schema.metric.c.parameters, schema.metric.c.status])
        self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)
        self.assertEqual(json.loads(metricObj.parameters), modelSpec)

        self._validateModelSpec(json.loads(metricObj.parameters))

        # Export again
        exportSpec = adapter.exportModel(metricId)
        checkExportSpec(exportSpec)
Ejemplo n.º 14
0
  def checkModelIsActive(self, uid):
    engine = repository.engineFactory(config=self.__config)
    with engine.begin() as conn:
      metricObj = repository.getMetric(conn,
                                       uid,
                                       fields=[schema.metric.c.status])

    self.assertEqual(metricObj.status, MetricStatus.ACTIVE)
Ejemplo n.º 15
0
    def checkModelIsActive(self, uid):
        engine = repository.engineFactory(config=self.__config)
        with engine.begin() as conn:
            metricObj = repository.getMetric(conn,
                                             uid,
                                             fields=[schema.metric.c.status])

        self.assertEqual(metricObj.status, MetricStatus.ACTIVE)
  def test_MinMaxDelayedCreation(self):
    """Tests that the min and max are set correctly when not specified."""

    metricName = "testMinMaxDelayedCreation.%i" % int(time.time())
    LOGGER.info("Running test with metric name: %s", metricName)

    self.addCleanup(self._deleteMetric, metricName)

    def timeGenerator():
      """Generator for unix timestamps."""
      dt = datetime.datetime.utcnow() - datetime.timedelta(hours=25)
      td = datetime.timedelta(minutes=5)
      while True:
        dt += td
        yield int(calendar.timegm(dt.utctimetuple()))
    nextTime = timeGenerator()

    # Add custom metric data
    sock = socket.socket()
    sock.connect(("localhost", self.plaintextPort))
    sock.sendall("%s 0.0 %i\n" % (metricName, nextTime.next()))
    sock.sendall("%s 100.0 %i\n" % (metricName, nextTime.next()))
    self.gracefullyCloseSocket(sock)

    uid = self.checkMetricCreated(metricName)

    # Save the uid for later
    LOGGER.info("Metric %s has uid: %s", metricName, uid)

    # Send model creation request
    nativeMetric = {"datasource": "custom",
                    "metricSpec": {"uid": uid}}
    model = self._createModel(nativeMetric)
    self.assertEqual(model.status, MetricStatus.PENDING_DATA)

    # Add more data
    sock = socket.socket()
    sock.connect(("localhost", self.plaintextPort))
    for _ in xrange(MODEL_CREATION_RECORD_THRESHOLD - 2):
      sock.sendall("%s 7000.0 %i\n" % (metricName, nextTime.next()))
    self.gracefullyCloseSocket(sock)

    for _ in xrange(60):
      with self.engine.begin() as conn:
        metric = repository.getMetric(conn, uid)

      if metric.status == MetricStatus.ACTIVE:
        break
      LOGGER.info("Model=%s not ready.  Sleeping 5 seconds...")
      time.sleep(5)
    else:
      self.fail("Model results not available within 5 minutes")

    # Check the min and max for the model
    self.checkEncoderResolution(uid, 0.0, 7000.0)

    # Check that the data all got processed
    self.checkModelResultsSize(uid, MODEL_CREATION_RECORD_THRESHOLD)
  def test_MinMaxDelayedCreationNoMetricIntegrityErrorMER2190(self):
    """Tests that delayed creation doesn't cause integrity error in
    custom-metric model. It sends more than MODEL_CREATION_RECORD_THRESHOLD
    rows """
    metricName = ("testMinMaxDelayedCreationNoMetricIntegrityErrorMER2190.%i"
                  % int(time.time()))
    LOGGER.info("Running test with metric name: %s", metricName)

    totalRowsToSend = MODEL_CREATION_RECORD_THRESHOLD + 700

    self.addCleanup(self._deleteMetric, metricName)

    def timeGenerator():
      """Generator for unix timestamps."""
      backoff = datetime.timedelta(minutes=5 * (totalRowsToSend + 1))
      dt = datetime.datetime.utcnow() - backoff
      td = datetime.timedelta(minutes=5)
      while True:
        dt += td
        yield int(calendar.timegm(dt.utctimetuple()))
    nextTime = timeGenerator()

    # Add custom metric data
    sock = socket.socket()
    sock.connect(("localhost", self.plaintextPort))
    sock.sendall("%s 0.0 %i\n" % (metricName, nextTime.next()))
    self.gracefullyCloseSocket(sock)

    uid = self.checkMetricCreated(metricName)
    LOGGER.info("Metric %s has uid: %s", metricName, uid)

    # Send model creation request
    nativeMetric = {"datasource": "custom",
                    "metricSpec": {"uid": uid}}

    model = self._createModel(nativeMetric)
    self.assertEqual(model.status, MetricStatus.PENDING_DATA)

    # Add more data
    sock = socket.socket()
    sock.connect(("localhost", self.plaintextPort))
    for _ in xrange(totalRowsToSend - 1):
      sock.sendall("%s 7000.0 %i\n" % (metricName, nextTime.next()))
    self.gracefullyCloseSocket(sock)

    for _ in xrange(60):
      with self.engine.begin() as conn:
        metric = repository.getMetric(conn, uid)

      if metric.status == MetricStatus.ACTIVE:
        break
      LOGGER.info("Model=%s not ready.  Sleeping 5 seconds...")
      time.sleep(5)
    else:
      self.fail("Model results not available within 5 minutes")

    # Check that the data all got processed
    self.checkModelResultsSize(uid, totalRowsToSend)
Ejemplo n.º 18
0
    def test_MinMaxDelayedCreation(self):
        """Tests that the min and max are set correctly when not specified."""

        metricName = "testMinMaxDelayedCreation.%i" % int(time.time())
        LOGGER.info("Running test with metric name: %s", metricName)

        self.addCleanup(self._deleteMetric, metricName)

        def timeGenerator():
            """Generator for unix timestamps."""
            dt = datetime.datetime.utcnow() - datetime.timedelta(hours=25)
            td = datetime.timedelta(minutes=5)
            while True:
                dt += td
                yield int(calendar.timegm(dt.utctimetuple()))

        nextTime = timeGenerator()

        # Add custom metric data
        sock = socket.socket()
        sock.connect(("localhost", self.plaintextPort))
        sock.sendall("%s 0.0 %i\n" % (metricName, nextTime.next()))
        sock.sendall("%s 100.0 %i\n" % (metricName, nextTime.next()))
        self.gracefullyCloseSocket(sock)

        uid = self.checkMetricCreated(metricName)

        # Save the uid for later
        LOGGER.info("Metric %s has uid: %s", metricName, uid)

        # Send model creation request
        nativeMetric = {"datasource": "custom", "metricSpec": {"uid": uid}}
        model = self._createModel(nativeMetric)
        self.assertEqual(model.status, MetricStatus.PENDING_DATA)

        # Add more data
        sock = socket.socket()
        sock.connect(("localhost", self.plaintextPort))
        for _ in xrange(MODEL_CREATION_RECORD_THRESHOLD - 2):
            sock.sendall("%s 7000.0 %i\n" % (metricName, nextTime.next()))
        self.gracefullyCloseSocket(sock)

        for _ in xrange(60):
            with self.engine.begin() as conn:
                metric = repository.getMetric(conn, uid)

            if metric.status == MetricStatus.ACTIVE:
                break
            LOGGER.info("Model=%s not ready.  Sleeping 5 seconds...")
            time.sleep(5)
        else:
            self.fail("Model results not available within 5 minutes")

        # Check the min and max for the model
        self.checkEncoderResolution(uid, 0.0, 7000.0)

        # Check that the data all got processed
        self.checkModelResultsSize(uid, MODEL_CREATION_RECORD_THRESHOLD)
Ejemplo n.º 19
0
    def _createModel(self, nativeMetric):
        adapter = createDatasourceAdapter("custom")
        try:
            metricId = adapter.monitorMetric(nativeMetric)
        except MetricAlreadyMonitored as e:
            metricId = e.uid

        engine = repository.engineFactory(config=self.config)

        with engine.begin() as conn:
            return repository.getMetric(conn, metricId)
Ejemplo n.º 20
0
  def _composeModelCommandResultMessage(cls, modelID, cmdResult):
    """ Compose message corresponding to the completion of a model command
    for publishing to downstream services.

    :param modelID: model identifier
    :param model_swapper_interface.ModelCommandResult cmdResult: model command
      result
    :returns: JSON-ifiable message contents object per
      model_command_result_amqp_message.json
    :rtype: dict
    :raises ObjectNotFoundError: when attempted to request additional info about
      a model that is not in the repository
    :raises MetricNotMonitoredError: when required info about a model is not
      available, because it's no longer monitored
    """
    commandResultMessage = dict(
      method=cmdResult.method,
      modelId=modelID,
      commandId=cmdResult.commandID,
      status=cmdResult.status,
      errorMessage=cmdResult.errorMessage,
    )

    if (cmdResult.method == "defineModel" and
        cmdResult.status == htmengineerrno.SUCCESS):
      # Add modelInfo for successfully-completed "defineModel" commands
      engine = repository.engineFactory(config)
      fields = [
        schema.metric.c.name,
        schema.metric.c.server,
        schema.metric.c.parameters
      ]
      try:
        with engine.connect() as conn:
          metricObj = repository.getMetric(
            conn,
            modelID,
            fields=fields)
      except ObjectNotFoundError:
        g_log.warning("_composeModelCommandResultMessage: method=%s; "
                      "model=%s not found", cmdResult.method, modelID)
        raise

      if not metricObj.parameters:
        g_log.warning("_composeModelCommandResultMessage: method=%s; "
                      "model=%s not monitored", cmdResult.method, modelID)
        raise MetricNotMonitoredError

      commandResultMessage["modelInfo"] = dict(
        metricName=metricObj.name,
        resource=metricObj.server,
        modelSpec=json.loads(metricObj.parameters))

    return commandResultMessage
Ejemplo n.º 21
0
    def _composeModelCommandResultMessage(cls, modelID, cmdResult):
        """ Compose message corresponding to the completion of a model command
    for publishing to downstream services.

    :param modelID: model identifier
    :param model_swapper_interface.ModelCommandResult cmdResult: model command
      result
    :returns: JSON-ifiable message contents object per
      model_command_result_amqp_message.json
    :rtype: dict
    :raises ObjectNotFoundError: when attempted to request additional info about
      a model that is not in the repository
    :raises MetricNotMonitoredError: when required info about a model is not
      available, because it's no longer monitored
    """
        commandResultMessage = dict(
            method=cmdResult.method,
            modelId=modelID,
            commandId=cmdResult.commandID,
            status=cmdResult.status,
            errorMessage=cmdResult.errorMessage,
        )

        if (cmdResult.method == "defineModel"
                and cmdResult.status == htmengineerrno.SUCCESS):
            # Add modelInfo for successfully-completed "defineModel" commands
            engine = repository.engineFactory(config)
            fields = [
                schema.metric.c.name, schema.metric.c.server,
                schema.metric.c.parameters
            ]
            try:
                with engine.connect() as conn:
                    metricObj = repository.getMetric(conn,
                                                     modelID,
                                                     fields=fields)
            except ObjectNotFoundError:
                g_log.warning(
                    "_composeModelCommandResultMessage: method=%s; "
                    "model=%s not found", cmdResult.method, modelID)
                raise

            if not metricObj.parameters:
                g_log.warning(
                    "_composeModelCommandResultMessage: method=%s; "
                    "model=%s not monitored", cmdResult.method, modelID)
                raise MetricNotMonitoredError

            commandResultMessage["modelInfo"] = dict(metricName=metricObj.name,
                                                     resource=metricObj.server,
                                                     modelSpec=json.loads(
                                                         metricObj.parameters))

        return commandResultMessage
Ejemplo n.º 22
0
  def _createModel(self, nativeMetric):
    adapter = createDatasourceAdapter("custom")
    try:
      metricId = adapter.monitorMetric(nativeMetric)
    except MetricAlreadyMonitored as e:
      metricId = e.uid

    engine = repository.engineFactory(config=self.config)

    with engine.begin() as conn:
      return repository.getMetric(conn, metricId)
Ejemplo n.º 23
0
    def start():
        with repository.engineFactory(config).begin() as conn:
            metricObj = repository.getMetric(conn, metricId)
            modelStarted = (_startModelHelper(conn=conn,
                                              metricObj=metricObj,
                                              swarmParams=swarmParams,
                                              logger=logger))
            if modelStarted:
                sendBacklogDataToModel(conn=conn,
                                       metricId=metricId,
                                       logger=logger)

            return modelStarted
Ejemplo n.º 24
0
  def checkMetricUnmonitoredById(self, uid):
    engine = repository.engineFactory(config=self.__config)
    with engine.begin() as conn:
      metricObj = repository.getMetric(conn,
                                       uid,
                                       fields=[schema.metric.c.status,
                                               schema.metric.c.parameters])

    self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
    self.assertIsNone(metricObj.parameters)

    with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
      model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
Ejemplo n.º 25
0
    def checkMetricUnmonitoredById(self, uid):
        engine = repository.engineFactory(config=self.__config)
        with engine.begin() as conn:
            metricObj = repository.getMetric(
                conn,
                uid,
                fields=[schema.metric.c.status, schema.metric.c.parameters])

        self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
        self.assertIsNone(metricObj.parameters)

        with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
            model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
  def start():
    with repository.engineFactory(config).begin() as conn:
      metricObj = repository.getMetric(conn, metricId)
      modelStarted = (
        _startModelHelper(conn=conn,
                          metricObj=metricObj,
                          swarmParams=swarmParams,
                          logger=logger))
      if modelStarted:
        sendBacklogDataToModel(conn=conn,
                               metricId=metricId,
                               logger=logger)

      return modelStarted
Ejemplo n.º 27
0
    def testMonitorMetricWithCompleteModelParams(self):
        """ Test monitorMetric with complete set of user-provided model parameters
    that activates a model """
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        fileName = "custom_datasource_adapter_test_model_config.json"
        with self._openTestDataFile(fileName) as modelConfigFile:
            modelConfig = json.load(modelConfigFile)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            },
            "completeModelParams": {
                "modelConfig": modelConfig,
                "inferenceArgs": {
                    "predictionSteps": [1],
                    "predictedField": "bar",
                    "inputPredictedField": "auto"
                },
                "timestampFieldName": "foo",
                "valueFieldName": "bar"
            }
        }

        adapter.monitorMetric(modelSpec)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(
                conn,
                metricId,
                fields=[schema.metric.c.status, schema.metric.c.parameters])

        self._validateModelSpec(json.loads(metricObj.parameters))

        self.assertIn(metricObj.status,
                      (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE))
        self.assertEqual(json.loads(metricObj.parameters), modelSpec)

        g_log.info("Waiting for model to become active")
        self.checkModelIsActive(metricId)
Ejemplo n.º 28
0
    def checkMetricDeleted(self, uid):

        engine = repository.engineFactory(config=self.__config)

        with engine.begin() as conn:
            with self.assertRaises(Exception) as e:
                metric = repository.getMetric(conn, uid)

            models = repository.getAllModels(conn)
            for model in models:
                self.assertNotEqual(model.uid, uid,
                                    "Model showing up after deletion.")

        with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
            model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
Ejemplo n.º 29
0
  def checkMetricDeleted(self, uid):

    engine = repository.engineFactory(config=self.__config)

    with engine.begin() as conn:
      with self.assertRaises(Exception) as e:
        metric = repository.getMetric(conn, uid)

      models = repository.getAllModels(conn)
      for model in models:
        self.assertNotEqual(model.uid, uid,
                            "Model showing up after deletion.")

    with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
      model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
  def testMonitorMetricWithEnoughDataForStats(self):
    """ monitorMetric should create a model when there is enough data rows """
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Add enough data to force activation of model
    data = [
      (offset, datetime.datetime.utcnow() + datetime.timedelta(minutes=offset))
      for offset in xrange(
        0,
        scalar_metric_utils.MODEL_CREATION_RECORD_THRESHOLD * 5,
        5)
    ]
    self.assertEqual(len(data),
                     scalar_metric_utils.MODEL_CREATION_RECORD_THRESHOLD)

    with self.engine.connect() as conn:
      repository.addMetricData(conn, metricId, data)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",

      "metricSpec": {
        "metric": metricName
      },
    }

    adapter.monitorMetric(modelSpec)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.status])

    self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING,
                                     MetricStatus.ACTIVE))

    g_log.info("Waiting for model to become active")
    self.checkModelIsActive(metricId)

    g_log.info("Waiting at least one model result")
    self.checkModelResultsSize(metricId, 1, atLeast=True)
  def testMonitorMetricWithMinResolution(self):
    """
    Test monitorMetric with user-provided min/max and minResolution
    that activates a model.
    Make sure resolution doesn't drop below minResolution.
    """
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",

      "metricSpec": {
        "metric": metricName
      },

      "modelParams": {
        "min": 0,  # optional
        "max": 1,  # optional
        "minResolution": 0.5 # optional
      }
    }

    adapter.monitorMetric(modelSpec)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.status,
                                               schema.metric.c.parameters])
    self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING,
                                     MetricStatus.ACTIVE))
    self.assertEqual(json.loads(metricObj.parameters), modelSpec)
    #print metricObj.parameters

    self._validateModelSpec(json.loads(metricObj.parameters))

    g_log.info("Waiting for model to become active")
    self.checkModelIsActive(metricId)
    self.checkEncoderResolution(metricId, 0, 1, minResolution=0.5)
  def testMonitorMetricWithCompleteModelParams(self):
    """ Test monitorMetric with complete set of user-provided model parameters
    that activates a model """
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    fileName = "custom_datasource_adapter_test_model_config.json"
    with self._openTestDataFile(fileName) as modelConfigFile:
      modelConfig = json.load(modelConfigFile)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",
      "metricSpec": {
        "metric": metricName
      },
      "completeModelParams": {
        "modelConfig": modelConfig,
        "inferenceArgs": {"predictionSteps": [1], "predictedField": "bar",
                          "inputPredictedField": "auto"},
        "timestampFieldName": "foo",
        "valueFieldName": "bar"
      }
    }

    adapter.monitorMetric(modelSpec)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.status,
                                               schema.metric.c.parameters])

    self._validateModelSpec(json.loads(metricObj.parameters))

    self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING,
                                     MetricStatus.ACTIVE))
    self.assertEqual(json.loads(metricObj.parameters), modelSpec)

    g_log.info("Waiting for model to become active")
    self.checkModelIsActive(metricId)
Ejemplo n.º 33
0
    def testMonitorMetricWithEnoughDataForStats(self):
        """ monitorMetric should create a model when there is enough data rows """
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Add enough data to force activation of model
        data = [
            (offset,
             datetime.datetime.utcnow() + datetime.timedelta(minutes=offset))
            for offset in xrange(
                0, scalar_metric_utils.MODEL_CREATION_RECORD_THRESHOLD * 5, 5)
        ]
        self.assertEqual(len(data),
                         scalar_metric_utils.MODEL_CREATION_RECORD_THRESHOLD)

        with self.engine.connect() as conn:
            repository.addMetricData(conn, metricId, data)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            },
        }

        adapter.monitorMetric(modelSpec)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(conn,
                                             metricId,
                                             fields=[schema.metric.c.status])

        self.assertIn(metricObj.status,
                      (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE))

        g_log.info("Waiting for model to become active")
        self.checkModelIsActive(metricId)

        g_log.info("Waiting at least one model result")
        self.checkModelResultsSize(metricId, 1, atLeast=True)
Ejemplo n.º 34
0
    def testMonitorMetricClassifierEnabled(self):
        """ Test monitorMetric with request for enabled classifier in model
    params """
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            },
            "modelParams": {
                "min": 0,  # optional
                "max": 100,  # optional
                "enableClassifier": True
            }
        }

        adapter.monitorMetric(modelSpec)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(conn,
                                             metricId,
                                             fields=[
                                                 schema.metric.c.status,
                                                 schema.metric.c.parameters,
                                                 schema.metric.c.model_params
                                             ])
        self.assertEqual(metricObj.status, MetricStatus.CREATE_PENDING)
        self.assertEqual(json.loads(metricObj.parameters), modelSpec)

        self._assertClassifierStatusInModelParams(metricObj.model_params,
                                                  classifierEnabled=True)

        self._validateModelSpec(json.loads(metricObj.parameters))

        g_log.info("Waiting for model to become active")
        self.checkModelIsActive(metricId)
        self.checkEncoderResolution(metricId, 0, 100)
  def testMonitorMetricClassifierEnabled(self):
    """ Test monitorMetric with request for enabled classifier in model
    params """
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",

      "metricSpec": {
        "metric": metricName
      },

      "modelParams": {
        "min": 0,  # optional
        "max": 100,  # optional
        "enableClassifier": True
      }
    }

    adapter.monitorMetric(modelSpec)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.status,
                                               schema.metric.c.parameters,
                                               schema.metric.c.model_params])
    self.assertEqual(metricObj.status, MetricStatus.CREATE_PENDING)
    self.assertEqual(json.loads(metricObj.parameters), modelSpec)

    self._assertClassifierStatusInModelParams(metricObj.model_params,
                                              classifierEnabled=True)

    self._validateModelSpec(json.loads(metricObj.parameters))

    g_log.info("Waiting for model to become active")
    self.checkModelIsActive(metricId)
    self.checkEncoderResolution(metricId, 0, 100)
Ejemplo n.º 36
0
    def testMonitorMetricWithMinResolution(self):
        """
    Test monitorMetric with user-provided min/max and minResolution
    that activates a model.
    Make sure resolution doesn't drop below minResolution.
    """
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            },
            "modelParams": {
                "min": 0,  # optional
                "max": 1,  # optional
                "minResolution": 0.5  # optional
            }
        }

        adapter.monitorMetric(modelSpec)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(
                conn,
                metricId,
                fields=[schema.metric.c.status, schema.metric.c.parameters])
        self.assertIn(metricObj.status,
                      (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE))
        self.assertEqual(json.loads(metricObj.parameters), modelSpec)
        #print metricObj.parameters

        self._validateModelSpec(json.loads(metricObj.parameters))

        g_log.info("Waiting for model to become active")
        self.checkModelIsActive(metricId)
        self.checkEncoderResolution(metricId, 0, 1, minResolution=0.5)
Ejemplo n.º 37
0
  def checkModelDeleted(self, uid):
    """Check that the model has been deleted"""

    engine = repository.engineFactory(config=self.__config)

    with engine.begin() as conn:
      try:
        metric = repository.getMetric(conn, uid)
        raise Exception("Metric not deleted as expected")
      except app_exceptions.ObjectNotFoundError:
        pass

      models = repository.getAllModels(conn)
      for model in models:
        self.assertNotEqual(model.uid, uid,
                            "Model showing up after deletion.")

    with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
      model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
Ejemplo n.º 38
0
    def checkModelDeleted(self, uid):
        """Check that the model has been deleted"""

        engine = repository.engineFactory(config=self.__config)

        with engine.begin() as conn:
            try:
                metric = repository.getMetric(conn, uid)
                raise Exception("Metric not deleted as expected")
            except app_exceptions.ObjectNotFoundError:
                pass

            models = repository.getAllModels(conn)
            for model in models:
                self.assertNotEqual(model.uid, uid,
                                    "Model showing up after deletion.")

        with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
            model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
  def testCreateMetric(self):
    """ Test creation of custom metric """
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.name,
                                               schema.metric.c.datasource,
                                               schema.metric.c.status])

    self.assertEqual(metricObj.name, metricName)
    self.assertEqual(metricObj.datasource, "custom")
    self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
Ejemplo n.º 40
0
    def testCreateMetric(self):
        """ Test creation of custom metric """
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(conn,
                                             metricId,
                                             fields=[
                                                 schema.metric.c.name,
                                                 schema.metric.c.datasource,
                                                 schema.metric.c.status
                                             ])

        self.assertEqual(metricObj.name, metricName)
        self.assertEqual(metricObj.datasource, "custom")
        self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
Ejemplo n.º 41
0
    def testMonitorMetricWithUserInfo(self):
        """Test monitorMetric that includes an explicit userInfo property in
    metricSpec.
    """
        metricName = "test-" + uuid.uuid1().hex
        userInfo = {"symbol": "test-user-info"}

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName,
                "userInfo": userInfo
            }
        }

        adapter.monitorMetric(modelSpec)

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(conn,
                                             metricId,
                                             fields=[
                                                 schema.metric.c.parameters,
                                                 schema.metric.c.status,
                                                 schema.metric.c.server
                                             ])

        self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)
        self.assertEqual(json.loads(metricObj.parameters), modelSpec)

        self._validateModelSpec(json.loads(metricObj.parameters))
Ejemplo n.º 42
0
    def testExportImportCompleteModelParams(self):
        metricName = "test-" + uuid.uuid1().hex

        adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

        g_log.info("Creating htmengine custom metric; name=%s", metricName)
        metricId = adapter.createMetric(metricName)
        self.addCleanup(adapter.deleteMetricByName, metricName)

        # Add some data
        # NOTE: we discard the fractional part because it gets eliminated
        # in the database, and we will want to compare against retrieved
        # items later.
        now = datetime.datetime.utcnow().replace(microsecond=0)
        data = [(0, now - datetime.timedelta(minutes=5)), (100, now)]

        with self.engine.connect() as conn:
            repository.addMetricData(conn, metricId, data)

        fileName = "custom_datasource_adapter_test_model_config.json"
        with self._openTestDataFile(fileName) as modelConfigFile:
            modelConfig = json.load(modelConfigFile)

        # Turn on monitoring
        modelSpec = {
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName
            },
            "completeModelParams": {
                "modelConfig": modelConfig,
                "inferenceArgs": {
                    "predictionSteps": [1],
                    "predictedField": "bar",
                    "inputPredictedField": "auto"
                },
                "timestampFieldName": "foo",
                "valueFieldName": "bar"
            }
        }

        adapter.monitorMetric(modelSpec)

        def checkExportSpec(exportSpec):
            self.assertEqual(exportSpec["datasource"], modelSpec["datasource"])
            self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"])
            self.assertSequenceEqual(exportSpec["data"], data)

        # Export
        exportSpec = adapter.exportModel(metricId)
        checkExportSpec(exportSpec)

        # Delete metric
        adapter.deleteMetricByName(metricName)
        self.checkModelDeleted(metricId)

        # Import
        metricId = adapter.importModel(
            htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec)))

        with self.engine.connect() as conn:
            metricObj = repository.getMetric(
                conn,
                metricId,
                fields=[schema.metric.c.parameters, schema.metric.c.status])
        self.assertIn(metricObj.status,
                      (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE))
        self.assertEqual(json.loads(metricObj.parameters), modelSpec)

        self._validateModelSpec(json.loads(metricObj.parameters))

        # Export again
        exportSpec = adapter.exportModel(metricId)
        checkExportSpec(exportSpec)
Ejemplo n.º 43
0
    def testPathwayToDynamoDB(self):
        """ Test metric data pathway to dynamodb
    """

        metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16))

        nativeMetric = {
            "modelParams": {
                "minResolution": 0.2,
                "min": 0.0,
                "max": 10000.0,
            },
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName,
                "resource": "Test",
                "userInfo": {
                    "symbol": "TEST",
                    "metricType": "TwitterVolume",
                    "metricTypeName": "Twitter Volume",
                }
            }
        }
        metricName = nativeMetric["metricSpec"]["metric"]
        instanceName = nativeMetric["metricSpec"]["resource"]
        userInfo = nativeMetric["metricSpec"]["userInfo"]

        now = datetime.datetime.utcnow().replace(minute=0,
                                                 second=0,
                                                 microsecond=0)

        data = [
            (5000.0, now - datetime.timedelta(minutes=10)),
            (6000.0, now - datetime.timedelta(minutes=5)),
            (7000.0, now),
        ]

        # We'll be explicitly deleting the metric below, but we need to add a
        # cleanup step that runs in case there is some other failure that prevents
        # that part of the test from being reached.

        def gracefulDelete():
            try:
                self._deleteMetric(metricName)
            except ObjectNotFoundError:
                pass

        self.addCleanup(gracefulDelete)

        # Add custom metric data
        sock = socket.socket()
        sock.connect(("localhost", self.plaintextPort))
        for metricValue, ts in data:
            sock.sendall(
                "%s %r %s\n" %
                (metricName, metricValue, epochFromNaiveUTCDatetime(ts)))

        self.gracefullyCloseSocket(sock)

        uid = self.checkMetricCreated(metricName)

        # Save the uid for later
        LOGGER.info("Metric %s has uid: %s", metricName, uid)

        # Send model creation request
        model = self._createModel(nativeMetric)
        parameters = json.loads(model.parameters)
        self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo)

        for _ in xrange(60):
            with self.engine.begin() as conn:
                metric = repository.getMetric(conn, uid)

            if metric.status == MetricStatus.ACTIVE:
                break
            LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid)
            time.sleep(1)
        else:
            self.fail("Model results not available within 5 minutes")

        # Check that the data all got processed
        self.checkModelResultsSize(uid, 3)

        # Now check that the data was published to dynamodb...
        dynamodb = DynamoDBService.connectDynamoDB()

        metricTable = Table(MetricDynamoDBDefinition().tableName,
                            connection=dynamodb)
        metricItem = metricTable.lookup(uid)
        self.assertEqual(metricItem["uid"], uid)
        self.assertEqual(metricItem["name"], metricName)
        self.assertEqual(metricItem["metricType"], "TwitterVolume")
        self.assertEqual(metricItem["metricTypeName"], "Twitter Volume")
        self.assertEqual(metricItem["symbol"], "TEST")

        metricDataTable = Table(MetricDataDynamoDBDefinition().tableName,
                                connection=dynamodb)
        instanceDataAnomalyScores = {}
        for metricValue, ts in data:
            metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
                metricDataTable.lookup)(uid, ts.isoformat())
            # There is no server-side cleanup for metric data, so remove it here for
            # now to avoid accumulating test data
            self.addCleanup(metricDataItem.delete)
            self.assertEqual(metricValue, metricDataItem["metric_value"])
            dt = datetime.datetime.strptime(metricDataItem["timestamp"],
                                            "%Y-%m-%dT%H:%M:%S")
            self.assertEqual(ts, dt)
            ts = ts.replace(minute=0, second=0, microsecond=0)
            date = ts.strftime("%Y-%m-%d")
            hour = ts.strftime("%H")
            key = (date, hour)
            maxVal = instanceDataAnomalyScores.get(key, 0.0)
            instanceDataAnomalyScores[key] = max(
                maxVal, metricDataItem["anomaly_score"])

        # And check that the aggregated instance data is updated
        instanceDataHourlyTable = Table(
            InstanceDataHourlyDynamoDBDefinition().tableName,
            connection=dynamodb)
        for key, anomalyScore in instanceDataAnomalyScores.iteritems():
            date, hour = key
            instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
                instanceDataHourlyTable.lookup)(instanceName,
                                                "%sT%s" % (date, hour))
            self.addCleanup(instanceDataHourlyItem.delete)
            self.assertAlmostEqual(
                anomalyScore,
                float(
                    instanceDataHourlyItem["anomaly_score"]["TwitterVolume"]))
            self.assertEqual(date, instanceDataHourlyItem["date"])
            self.assertEqual(hour, instanceDataHourlyItem["hour"])

        # Now send some twitter data and validate that it made it to dynamodb

        twitterData = [{
            "metric_name": metricName,
            "tweet_uid": uid,
            "created_at": "2015-02-19T19:43:24.870109",
            "agg_ts": "2015-02-19T19:43:24.870118",
            "text": "Tweet text",
            "userid": "10",
            "username": "******",
            "retweet_count": "0"
        }]

        with MessageBusConnector() as messageBus:
            messageBus.publishExg(
                exchange=self.config.get("non_metric_data", "exchange_name"),
                routingKey=(
                    self.config.get("non_metric_data", "exchange_name") +
                    ".twitter"),
                body=json.dumps(twitterData))

        metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName,
                                  connection=dynamodb)
        metricTweetItem = metricTweetsTable.lookup(
            "-".join((metricName, uid)), "2015-02-19T19:43:24.870118")
        # There is no server-side cleanup for tweet data, so remove it here for
        # now to avoid accumulating test data
        self.addCleanup(metricTweetItem.delete)
        self.assertEqual(metricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(metricTweetItem["tweet_uid"],
                         twitterData[0]["tweet_uid"])
        self.assertEqual(metricTweetItem["created_at"],
                         twitterData[0]["created_at"])
        self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
        self.assertEqual(metricTweetItem["text"], twitterData[0]["text"])
        self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"])
        self.assertEqual(metricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(metricTweetItem["retweet_count"],
                         twitterData[0]["retweet_count"])

        queryResult = metricTweetsTable.query_2(
            metric_name__eq=metricName,
            agg_ts__eq=twitterData[0]["agg_ts"],
            index="taurus.metric_data-metric_name_index")
        queriedMetricTweetItem = next(queryResult)

        self.assertEqual(queriedMetricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(queriedMetricTweetItem["tweet_uid"],
                         twitterData[0]["tweet_uid"])
        self.assertEqual(queriedMetricTweetItem["created_at"],
                         twitterData[0]["created_at"])
        self.assertEqual(queriedMetricTweetItem["agg_ts"],
                         twitterData[0]["agg_ts"])
        self.assertEqual(queriedMetricTweetItem["text"],
                         twitterData[0]["text"])
        self.assertEqual(queriedMetricTweetItem["userid"],
                         twitterData[0]["userid"])
        self.assertEqual(queriedMetricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(queriedMetricTweetItem["retweet_count"],
                         twitterData[0]["retweet_count"])

        # Delete metric and ensure metric is deleted from dynamodb, too
        self._deleteMetric(metricName)

        for _ in xrange(60):
            time.sleep(1)
            try:
                metricItem = metricTable.lookup(uid)
            except ItemNotFound as err:
                break
        else:
            self.fail("Metric not deleted from dynamodb")
  def testExportImportCompleteModelParams(self):
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Add some data
    # NOTE: we discard the fractional part because it gets eliminated
    # in the database, and we will want to compare against retrieved
    # items later.
    now = datetime.datetime.utcnow().replace(microsecond=0)
    data = [
      (0, now - datetime.timedelta(minutes=5)),
      (100, now)
    ]

    with self.engine.connect() as conn:
      repository.addMetricData(conn, metricId, data)

    fileName = "custom_datasource_adapter_test_model_config.json"
    with self._openTestDataFile(fileName) as modelConfigFile:
      modelConfig = json.load(modelConfigFile)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",
      "metricSpec": {
        "metric": metricName
      },
      "completeModelParams": {
        "modelConfig": modelConfig,
        "inferenceArgs": {"predictionSteps": [1], "predictedField": "bar",
                          "inputPredictedField": "auto"},
        "timestampFieldName": "foo",
        "valueFieldName": "bar"
      }
    }

    adapter.monitorMetric(modelSpec)

    def checkExportSpec(exportSpec):
      self.assertEqual(exportSpec["datasource"], modelSpec["datasource"])
      self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"])
      self.assertSequenceEqual(exportSpec["data"], data)

    # Export
    exportSpec = adapter.exportModel(metricId)
    checkExportSpec(exportSpec)

    # Delete metric
    adapter.deleteMetricByName(metricName)
    self.checkModelDeleted(metricId)

    # Import
    metricId = adapter.importModel(
      htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec)))

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.parameters,
                                               schema.metric.c.status])
    self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING,
                                     MetricStatus.ACTIVE))
    self.assertEqual(json.loads(metricObj.parameters), modelSpec)

    self._validateModelSpec(json.loads(metricObj.parameters))

    # Export again
    exportSpec = adapter.exportModel(metricId)
    checkExportSpec(exportSpec)
def _startModelHelper(conn, metricObj, swarmParams, logger):
  """ Start the model

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricObj: metric, freshly-loaded
  :type metricObj: sqlalchemy.engine.RowProxy (see repository.getMetric())

  :param swarmParams: non-None swarmParams generated via
    scalar_metric_utils.generateSwarmParams().

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if the metric doesn't exist;
      this may happen if it got deleted by another process in the meantime.

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
  if swarmParams is None:
    raise ValueError(
      "startModel: 'swarmParams' must be non-None: metric=%s"
      % (metricObj.uid,))

  if metricObj.status not in (MetricStatus.UNMONITORED,
                           MetricStatus.PENDING_DATA):
    if metricObj.status in (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE):
      return False

    logger.error("Unexpected metric status; metric=%r", metricObj)
    raise ValueError("startModel: unexpected metric status; metric=%r"
                     % (metricObj,))

  startTime = time.time()

  # Save swarm parameters and update metric status
  refStatus = metricObj.status
  repository.updateMetricColumnsForRefStatus(
    conn,
    metricObj.uid,
    refStatus,
    {"status": MetricStatus.CREATE_PENDING,
     "model_params": htmengine.utils.jsonEncode(swarmParams)})

  metricObj = repository.getMetric(conn,
                                   metricObj.uid,
                                   fields=[schema.metric.c.uid,
                                           schema.metric.c.status]) # refresh

  if metricObj.status != MetricStatus.CREATE_PENDING:
    raise app_exceptions.MetricStatusChangedError(
      "startModel: unable to start model=%s; "
      "metric status morphed from %s to %s"
      % (metricObj.uid, refStatus, metricObj.status,))

  # Request to create the CLA model
  try:
    model_swapper_utils.createHTMModel(metricObj.uid, swarmParams)
  except Exception:
    logger.exception("startModel: createHTMModel failed.")
    repository.setMetricStatus(conn,
                               metricObj.uid,
                               status=MetricStatus.ERROR,
                               message=repr(sys.exc_info()[1]))
    raise

  logger.info("startModel: started model=%r; duration=%.4fs",
              metricObj, time.time() - startTime)

  return True
def startMonitoring(conn, metricId, swarmParams, logger):
  """ Start monitoring an UNMONITORED metric.

  NOTE: typically called either inside a transaction and/or with locked tables

  Starts the CLA model if provided non-None swarmParams; otherwise defers model
  creation to a later time and places the metric in MetricStatus.PENDING_DATA
  state.

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricId: unique identifier of the metric row

  :param swarmParams: swarmParams generated via
    scalar_metric_utils.generateSwarmParams() or None.

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if metric with the
    referenced metric uid doesn't exist

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
  modelStarted = False

  startTime = time.time()

  metricObj = repository.getMetric(conn, metricId)

  assert metricObj.status == MetricStatus.UNMONITORED, (
    "startMonitoring: metric=%s is already monitored; status=%s" % (
      metricId, metricObj.status,))

  if swarmParams is not None:
    # We have swarmParams, so start the model
    modelStarted = _startModelHelper(conn=conn,
                                     metricObj=metricObj,
                                     swarmParams=swarmParams,
                                     logger=logger)
  else:
    # Put the metric into the PENDING_DATA state until enough data arrives for
    # stats
    refStatus = metricObj.status

    repository.setMetricStatus(conn,
                               metricId,
                               MetricStatus.PENDING_DATA,
                               refStatus=refStatus)
    # refresh
    metricStatus = repository.getMetric(conn,
                                        metricId,
                                        fields=[schema.metric.c.status]).status

    if metricStatus == MetricStatus.PENDING_DATA:
      logger.info("startMonitoring: promoted metric to model in PENDING_DATA; "
                  "metric=%s; duration=%.4fs",
                  metricId, time.time() - startTime)
    else:
      raise app_exceptions.MetricStatusChangedError(
        "startMonitoring: unable to promote metric=%s to model as "
        "PENDING_DATA; metric status morphed from %s to %s"
        % (metricId, refStatus, metricStatus,))

  return modelStarted
Ejemplo n.º 47
0
def _startModelHelper(conn, metricObj, swarmParams, logger):
    """ Start the model

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricObj: metric, freshly-loaded
  :type metricObj: sqlalchemy.engine.RowProxy (see repository.getMetric())

  :param swarmParams: non-None swarmParams generated via
    scalar_metric_utils.generateSwarmParams().

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if the metric doesn't exist;
      this may happen if it got deleted by another process in the meantime.

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
    if swarmParams is None:
        raise ValueError(
            "startModel: 'swarmParams' must be non-None: metric=%s" %
            (metricObj.uid, ))

    if metricObj.status not in (MetricStatus.UNMONITORED,
                                MetricStatus.PENDING_DATA):
        if metricObj.status in (MetricStatus.CREATE_PENDING,
                                MetricStatus.ACTIVE):
            return False

        logger.error("Unexpected metric status; metric=%r", metricObj)
        raise ValueError("startModel: unexpected metric status; metric=%r" %
                         (metricObj, ))

    startTime = time.time()

    # Save swarm parameters and update metric status
    refStatus = metricObj.status
    repository.updateMetricColumnsForRefStatus(
        conn, metricObj.uid, refStatus, {
            "status": MetricStatus.CREATE_PENDING,
            "model_params": htmengine.utils.jsonEncode(swarmParams)
        })

    metricObj = repository.getMetric(
        conn,
        metricObj.uid,
        fields=[schema.metric.c.uid, schema.metric.c.status])  # refresh

    if metricObj.status != MetricStatus.CREATE_PENDING:
        raise app_exceptions.MetricStatusChangedError(
            "startModel: unable to start model=%s; "
            "metric status morphed from %s to %s" % (
                metricObj.uid,
                refStatus,
                metricObj.status,
            ))

    # Request to create the CLA model
    try:
        model_swapper_utils.createHTMModel(metricObj.uid, swarmParams)
    except Exception:
        logger.exception("startModel: createHTMModel failed.")
        repository.setMetricStatus(conn,
                                   metricObj.uid,
                                   status=MetricStatus.ERROR,
                                   message=repr(sys.exc_info()[1]))
        raise

    logger.info("startModel: started model=%r; duration=%.4fs", metricObj,
                time.time() - startTime)

    return True
Ejemplo n.º 48
0
  def testPathwayToDynamoDB(self):
    """ Test metric data pathway to dynamodb
    """

    metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16))

    nativeMetric = {
      "modelParams": {
        "minResolution": 0.2,
        "min": 0.0,
        "max": 10000.0,
      },
      "datasource": "custom",
      "metricSpec": {
        "metric": metricName,
        "resource": "Test",
        "userInfo": {
          "symbol": "TEST",
          "metricType": "TwitterVolume",
          "metricTypeName": "Twitter Volume",
        }
      }
    }
    metricName = nativeMetric["metricSpec"]["metric"]
    instanceName = nativeMetric["metricSpec"]["resource"]
    userInfo = nativeMetric["metricSpec"]["userInfo"]

    now = datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0)

    data = [
      (5000.0, now - datetime.timedelta(minutes=10)),
      (6000.0, now - datetime.timedelta(minutes=5)),
      (7000.0, now),
    ]

    # We'll be explicitly deleting the metric below, but we need to add a
    # cleanup step that runs in case there is some other failure that prevents
    # that part of the test from being reached.

    def gracefulDelete():
      try:
        self._deleteMetric(metricName)
      except ObjectNotFoundError:
        pass

    self.addCleanup(gracefulDelete)

    # Add custom metric data
    sock = socket.socket()
    sock.connect(("localhost", self.plaintextPort))
    for metricValue, ts in data:
      sock.sendall("%s %r %s\n" % (metricName,
                                   metricValue,
                                   epochFromNaiveUTCDatetime(ts)))

    self.gracefullyCloseSocket(sock)

    uid = self.checkMetricCreated(metricName)

    # Save the uid for later
    LOGGER.info("Metric %s has uid: %s", metricName, uid)

    # Send model creation request
    model = self._createModel(nativeMetric)
    parameters = json.loads(model.parameters)
    self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo)

    for _ in xrange(60):
      with self.engine.begin() as conn:
        metric = repository.getMetric(conn, uid)

      if metric.status == MetricStatus.ACTIVE:
        break
      LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid)
      time.sleep(1)
    else:
      self.fail("Model results not available within 5 minutes")

    # Check that the data all got processed
    self.checkModelResultsSize(uid, 3)

    # Now check that the data was published to dynamodb...
    dynamodb = DynamoDBService.connectDynamoDB()

    metricTable = Table(MetricDynamoDBDefinition().tableName,
                        connection=dynamodb)
    metricItem = metricTable.lookup(uid)
    self.assertEqual(metricItem["uid"], uid)
    self.assertEqual(metricItem["name"], metricName)
    self.assertEqual(metricItem["metricType"], "TwitterVolume")
    self.assertEqual(metricItem["metricTypeName"], "Twitter Volume")
    self.assertEqual(metricItem["symbol"], "TEST")

    metricDataTable = Table(MetricDataDynamoDBDefinition().tableName,
                            connection=dynamodb)
    instanceDataAnomalyScores = {}
    for metricValue, ts in data:
      metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
        metricDataTable.lookup
      )(uid, ts.isoformat())
      # There is no server-side cleanup for metric data, so remove it here for
      # now to avoid accumulating test data
      self.addCleanup(metricDataItem.delete)
      self.assertEqual(metricValue, metricDataItem["metric_value"])
      dt = datetime.datetime.strptime(metricDataItem["timestamp"],
                                      "%Y-%m-%dT%H:%M:%S")
      self.assertEqual(ts, dt)
      ts = ts.replace(minute=0, second=0, microsecond=0)
      date = ts.strftime("%Y-%m-%d")
      hour = ts.strftime("%H")
      key = (date, hour)
      maxVal = instanceDataAnomalyScores.get(key, 0.0)
      instanceDataAnomalyScores[key] = max(
          maxVal, metricDataItem["anomaly_score"])

    # And check that the aggregated instance data is updated
    instanceDataHourlyTable = Table(
        InstanceDataHourlyDynamoDBDefinition().tableName, connection=dynamodb)
    for key, anomalyScore in instanceDataAnomalyScores.iteritems():
      date, hour = key
      instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
        instanceDataHourlyTable.lookup
      )(instanceName, "%sT%s" % (date, hour))
      self.addCleanup(instanceDataHourlyItem.delete)
      self.assertAlmostEqual(
          anomalyScore,
          float(instanceDataHourlyItem["anomaly_score"]["TwitterVolume"]))
      self.assertEqual(date, instanceDataHourlyItem["date"])
      self.assertEqual(hour, instanceDataHourlyItem["hour"])

    # Now send some twitter data and validate that it made it to dynamodb

    twitterData = [
      {
        "metric_name": metricName,
        "tweet_uid": uid,
        "created_at": "2015-02-19T19:43:24.870109",
        "agg_ts": "2015-02-19T19:43:24.870118",
        "text": "Tweet text",
        "userid": "10",
        "username": "******",
        "retweet_count": "0"
      }
    ]

    with MessageBusConnector() as messageBus:
      messageBus.publishExg(
        exchange=self.config.get("non_metric_data", "exchange_name"),
        routingKey=(
          self.config.get("non_metric_data", "exchange_name") + ".twitter"),
        body=json.dumps(twitterData)
      )


    metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName,
                              connection=dynamodb)
    for _ in range(30):
      try:
        metricTweetItem =  metricTweetsTable.lookup(
          twitterData[0]["text"],
          twitterData[0]["agg_ts"]
        )
        break
      except ItemNotFound:
        # LOL eventual consistency
        time.sleep(1)
        continue
    # There is no server-side cleanup for tweet data, so remove it here for
    # now to avoid accumulating test data
    self.addCleanup(metricTweetItem.delete)
    self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"])
    self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"])
    self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
    self.assertEqual(metricTweetItem["text"], twitterData[0]["text"])
    self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"])
    self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"])
    self.assertEqual(metricTweetItem["copy_count"], 0)

    sort_key = twitterData[0]["agg_ts"]

    ts = (epochFromNaiveUTCDatetime(
      datetime.datetime.strptime(twitterData[0]["agg_ts"].partition(".")[0],
                                 "%Y-%m-%dT%H:%M:%S")) * 1e5)
    queryResult = metricTweetsTable.query_2(
      metric_name__eq=metricName,
      sort_key__gte=ts,
      index="taurus.metric_data-metric_name_index")
    queriedMetricTweetItem = next(queryResult)

    self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(queriedMetricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"])
    self.assertEqual(queriedMetricTweetItem["created_at"], twitterData[0]["created_at"])
    self.assertEqual(queriedMetricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
    self.assertEqual(queriedMetricTweetItem["text"], twitterData[0]["text"])
    self.assertEqual(queriedMetricTweetItem["userid"], twitterData[0]["userid"])
    self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(queriedMetricTweetItem["retweet_count"], twitterData[0]["retweet_count"])
    self.assertEqual(queriedMetricTweetItem["copy_count"], 0)
    self.assertEqual(queriedMetricTweetItem["sort_key"], ts)

    duplicatedTwitterData = [
      {
        "metric_name": "copy of " + metricName,
        "tweet_uid": "copy of " + uid,
        "created_at": "2015-02-19T19:45:24.870109",
        "agg_ts": "2015-02-19T19:43:24.870118", # Same agg_ts!
        "text": "Tweet text", # Same text!
        "userid": "20",
        "username": "******",
        "retweet_count": "0"
      }
    ]

    with MessageBusConnector() as messageBus:
      messageBus.publishExg(
        exchange=self.config.get("non_metric_data", "exchange_name"),
        routingKey=(
          self.config.get("non_metric_data", "exchange_name") + ".twitter"),
        body=json.dumps(duplicatedTwitterData)
      )

    for _ in range(30):
      metricTweetItem =  metricTweetsTable.lookup(
        twitterData[0]["text"],
        twitterData[0]["agg_ts"]
      )

      if metricTweetItem["copy_count"] != 1:
        time.sleep(1)
        continue

      # Assert same as original, except for copy_count, which should be 1

      self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
      self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"])
      self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"])
      self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
      self.assertEqual(metricTweetItem["text"], twitterData[0]["text"])
      self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"])
      self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
      self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"])
      self.assertEqual(metricTweetItem["sort_key"], ts + 1)

      break
    else:
      self.fail("copy_count of original tweet not updated within reasonable"
                " amount of time (~30s) for duplicated tweet.")

    # Delete metric and ensure metric is deleted from dynamodb, too
    self._deleteMetric(metricName)

    for _ in xrange(60):
      time.sleep(1)
      try:
        metricItem = metricTable.lookup(uid)
      except ItemNotFound as err:
        break
    else:
      self.fail("Metric not deleted from dynamodb")
Ejemplo n.º 49
0
  def _processModelInferenceResults(self, inferenceResults, metricID):
    """
    Process a batch of model inference results

    Store the updated MetricData and anomaly likelihood parameters in the
    database.

    A row's anomaly_score value will be set to and remain at 0 in the
    first self._statisticsMinSampleSize rows; once we get enough inference
    results to create an anomaly likelihood model, anomaly_score will be
    computed on the subsequent rows.

    :param inferenceResults: a sequence of ModelInferenceResult instances in the
      processed order (ascending by timestamp)

    :param metricID: metric/model ID of the model that emitted the results

    :returns: None if the batch was rejected; otherwise a pair:
      (metric, metricDataRows)
        metric: Metric RowProxy instance corresponding to the given metricID
        metricDataRows: a sequence of MutableMetricDataRow instances
          corresponding to the updated metric_data rows.
      TODO: unit-test return value
    :rtype: None or tuple

    *NOTE:*
      the processing must be idempotent due to the "at least once" delivery
      semantics of the message bus

    *NOTE:*
      the performance goal is to minimize costly database access and avoid
      falling behind while processing model results, especially during the
      model's initial "catch-up" phase when large inference result batches are
      prevalent.
    """
    engine = repository.engineFactory(config)

    # Validate model ID
    try:
      with engine.connect() as conn:
        metricObj = repository.getMetric(conn, metricID)
    except ObjectNotFoundError:
      # Ignore inferences for unknown models. Typically, this is is the result
      # of a deleted model. Another scenario where this might occur is when a
      # developer resets the db while there are result messages still on the
      # message bus. It would be an error if this were to occur in production
      # environment.
      self._log.warning("Received inference results for unknown model=%s; "
                        "(model deleted?)", metricID, exc_info=True)
      return None

    # Reject the results if model is in non-ACTIVE state (e.g., if HTM Metric
    # was unmonitored after the results were generated)
    if metricObj.status != MetricStatus.ACTIVE:
      self._log.warning("Received inference results for a non-ACTIVE "
                        "model=%s; metric=<%s>; (metric unmonitored?)",
                        metricID, getMetricLogPrefix(metricObj))
      return None

    # Load the MetricData instances corresponding to the results
    with engine.connect() as conn:
      metricDataRows = repository.getMetricData(conn,
                                                metricID,
                                                start=inferenceResults[0].rowID,
                                                stop=inferenceResults[-1].rowID)

    # metricDataRows must be mutable, as the data is massaged in
    # _scrubInferenceResultsAndInitMetricData()
    metricDataRows = list(metricDataRows)

    if not metricDataRows:
      self._log.error("Rejected inference result batch=[%s..%s] of model=%s "
                      "due to no matching metric_data rows",
                      inferenceResults[0].rowID, inferenceResults[-1].rowID,
                      metricID)
      return None

    try:
      self._scrubInferenceResultsAndInitMetricData(
        engine=engine,
        inferenceResults=inferenceResults,
        metricDataRows=metricDataRows,
        metricObj=metricObj)
    except RejectedInferenceResultBatch as e:
      # TODO: unit-test
      self._log.error(
        "Rejected inference result batch=[%s..%s] corresponding to "
        "rows=[%s..%s] of model=%s due to error=%r",
        inferenceResults[0].rowID, inferenceResults[-1].rowID,
        metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, e)
      return None

   # Update anomaly scores based on the new results
    anomalyLikelihoodParams = (
      self.likelihoodHelper.updateModelAnomalyScores(
        engine=engine,
        metricObj=metricObj,
        metricDataRows=metricDataRows))

    # Update metric data rows with rescaled display values
    # NOTE: doing this outside the updateColumns loop to avoid holding row locks
    #  any longer than necessary
    for metricData in metricDataRows:
      metricData.display_value = rescaleForDisplay(
        metricData.anomaly_score,
        active=(metricObj.status == MetricStatus.ACTIVE))

    # Update database once via transaction!
    startTime = time.time()
    try:
      @retryOnTransientErrors
      def runSQL(engine):
        with engine.begin() as conn:
          for metricData in metricDataRows:
            fields = {"raw_anomaly_score": metricData.raw_anomaly_score,
                      "anomaly_score": metricData.anomaly_score,
                      "display_value": metricData.display_value,
                      "multi_step_best_predictions":
                        json.dumps(metricData.multi_step_best_predictions)}
            repository.updateMetricDataColumns(conn, metricData, fields)

          self._updateAnomalyLikelihoodParams(
            conn,
            metricObj.uid,
            metricObj.model_params,
            anomalyLikelihoodParams)

      runSQL(engine)
    except (ObjectNotFoundError, MetricNotActiveError):
      self._log.warning("Rejected inference result batch=[%s..%s] of model=%s",
                        inferenceResults[0].rowID, inferenceResults[-1].rowID,
                        metricID, exc_info=True)
      return None

    self._log.debug("Updated HTM metric_data rows=[%s..%s] "
                    "of model=%s: duration=%ss",
                    metricDataRows[0].rowid, metricDataRows[-1].rowid,
                    metricID, time.time() - startTime)

    return (metricObj, metricDataRows,)
Ejemplo n.º 50
0
    def _processModelInferenceResults(self, inferenceResults, metricID):
        """
    Process a batch of model inference results

    Store the updated MetricData and anomaly likelihood parameters in the
    database.

    A row's anomaly_score value will be set to and remain at 0 in the
    first self._statisticsMinSampleSize rows; once we get enough inference
    results to create an anomaly likelyhood model, anomaly_score will be
    computed on the subsequent rows.

    :param inferenceResults: a sequence of ModelInferenceResult instances in the
      processed order (ascending by timestamp)

    :param metricID: metric/model ID of the model that emitted the results

    :returns: None if the batch was rejected; otherwise a pair:
      (metric, metricDataRows)
        metric: Metric RowProxy instance corresponding to the given metricID
        metricDataRows: a sequence of MutableMetricDataRow instances
          corresponding to the updated metric_data rows.
      TODO: unit-test return value
    :rtype: None or tuple

    *NOTE:*
      the processing must be idempotent due to the "at least once" delivery
      semantics of the message bus

    *NOTE:*
      the performance goal is to minimize costly database access and avoid
      falling behind while processing model results, especially during the
      model's initial "catch-up" phase when large inference result batches are
      prevalent.
    """
        engine = repository.engineFactory(config)

        # Validate model ID
        try:
            with engine.connect() as conn:
                metricObj = repository.getMetric(conn, metricID)
        except ObjectNotFoundError:
            # Ignore inferences for unkonwn models. Typically, this is is the result
            # of a deleted model. Another scenario where this might occur is when a
            # developer resets db while there are result messages still on the
            # message bus. It would be an error if this were to occur in production
            # environment.
            self._log.warning(
                "Received inference results for unknown model=%s; "
                "(model deleted?)",
                metricID,
                exc_info=True)
            return None

        # Reject the results if model is in non-ACTIVE state (e.g., if HTM Metric
        # was unmonitored after the results were generated)
        if metricObj.status != MetricStatus.ACTIVE:
            self._log.warning(
                "Received inference results for a non-ACTIVE "
                "model=%s; metric=<%s>; (metric unmonitored?)", metricID,
                getMetricLogPrefix(metricObj))
            return None

        # Load the MetricData instances corresponding to the results
        with engine.connect() as conn:
            metricDataRows = repository.getMetricData(
                conn,
                metricID,
                start=inferenceResults[0].rowID,
                stop=inferenceResults[-1].rowID)

        # metricDataRows must be mutable, as the data is massaged in
        # _scrubInferenceResultsAndInitMetricData()
        metricDataRows = list(metricDataRows)

        if not metricDataRows:
            self._log.error(
                "Rejected inference result batch=[%s..%s] of model=%s "
                "due to no matching metric_data rows",
                inferenceResults[0].rowID, inferenceResults[-1].rowID,
                metricID)
            return None

        try:
            self._scrubInferenceResultsAndInitMetricData(
                engine=engine,
                inferenceResults=inferenceResults,
                metricDataRows=metricDataRows,
                metricObj=metricObj)
        except RejectedInferenceResultBatch as e:
            # TODO: unit-test
            self._log.error(
                "Rejected inference result batch=[%s..%s] corresponding to "
                "rows=[%s..%s] of model=%s due to error=%r",
                inferenceResults[0].rowID, inferenceResults[-1].rowID,
                metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, e)
            return None

        # Update anomaly scores based on the new results
        anomalyLikelihoodParams = (
            self.likelihoodHelper.updateModelAnomalyScores(
                engine=engine,
                metricObj=metricObj,
                metricDataRows=metricDataRows))

        # Update metric data rows with rescaled display values
        # NOTE: doing this outside the updateColumns loop to avoid holding row locks
        #  any longer than necessary
        for metricData in metricDataRows:
            metricData.display_value = rescaleForDisplay(
                metricData.anomaly_score,
                active=(metricObj.status == MetricStatus.ACTIVE))

        # Update database once via transaction!
        startTime = time.time()
        try:

            @retryOnTransientErrors
            def runSQL(engine):
                with engine.begin() as conn:
                    for metricData in metricDataRows:
                        fields = {
                            "raw_anomaly_score": metricData.raw_anomaly_score,
                            "anomaly_score": metricData.anomaly_score,
                            "display_value": metricData.display_value
                        }
                        repository.updateMetricDataColumns(
                            conn, metricData, fields)

                    self._updateAnomalyLikelihoodParams(
                        conn, metricObj.uid, metricObj.model_params,
                        anomalyLikelihoodParams)

            runSQL(engine)
        except (ObjectNotFoundError, MetricNotActiveError):
            self._log.warning(
                "Rejected inference result batch=[%s..%s] of model=%s",
                inferenceResults[0].rowID,
                inferenceResults[-1].rowID,
                metricID,
                exc_info=True)
            return None

        self._log.debug(
            "Updated HTM metric_data rows=[%s..%s] "
            "of model=%s: duration=%ss", metricDataRows[0].rowid,
            metricDataRows[-1].rowid, metricID,
            time.time() - startTime)

        return (
            metricObj,
            metricDataRows,
        )
Ejemplo n.º 51
0
def startMonitoring(conn, metricId, swarmParams, logger):
    """ Start monitoring an UNMONITORED metric.

  NOTE: typically called either inside a transaction and/or with locked tables

  Starts the CLA model if provided non-None swarmParams; otherwise defers model
  creation to a later time and places the metric in MetricStatus.PENDING_DATA
  state.

  :param conn: SQLAlchemy Connection object for executing SQL
  :type conn: sqlalchemy.engine.Connection

  :param metricId: unique identifier of the metric row

  :param swarmParams: swarmParams generated via
    scalar_metric_utils.generateSwarmParams() or None.

  :param logger: logger object

  :returns: True if model was started; False if not

  :raises htmengine.exceptions.ObjectNotFoundError: if metric with the
    referenced metric uid doesn't exist

  :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was
      changed by someone else (most likely another process) before this
      operation could complete
  """
    modelStarted = False

    startTime = time.time()

    metricObj = repository.getMetric(conn, metricId)

    assert metricObj.status == MetricStatus.UNMONITORED, (
        "startMonitoring: metric=%s is already monitored; status=%s" % (
            metricId,
            metricObj.status,
        ))

    if swarmParams is not None:
        # We have swarmParams, so start the model
        modelStarted = _startModelHelper(conn=conn,
                                         metricObj=metricObj,
                                         swarmParams=swarmParams,
                                         logger=logger)
    else:
        # Put the metric into the PENDING_DATA state until enough data arrives for
        # stats
        refStatus = metricObj.status

        repository.setMetricStatus(conn,
                                   metricId,
                                   MetricStatus.PENDING_DATA,
                                   refStatus=refStatus)
        # refresh
        metricStatus = repository.getMetric(conn,
                                            metricId,
                                            fields=[schema.metric.c.status
                                                    ]).status

        if metricStatus == MetricStatus.PENDING_DATA:
            logger.info(
                "startMonitoring: promoted metric to model in PENDING_DATA; "
                "metric=%s; duration=%.4fs", metricId,
                time.time() - startTime)
        else:
            raise app_exceptions.MetricStatusChangedError(
                "startMonitoring: unable to promote metric=%s to model as "
                "PENDING_DATA; metric status morphed from %s to %s" % (
                    metricId,
                    refStatus,
                    metricStatus,
                ))

    return modelStarted
  def testExportImport(self):
    metricName = "test-" + uuid.uuid1().hex

    adapter = datasource_adapter_factory.createCustomDatasourceAdapter()

    g_log.info("Creating htmengine custom metric; name=%s", metricName)
    metricId = adapter.createMetric(metricName)
    self.addCleanup(adapter.deleteMetricByName, metricName)

    # Add some data
    # NOTE: we discard the fractional part because it gets eliminated
    # in the database, and we will want to compare against retrieved
    # items later.
    now = datetime.datetime.utcnow().replace(microsecond=0)
    data = [
      (0, now - datetime.timedelta(minutes=5)),
      (100, now)
    ]

    with self.engine.connect() as conn:
      repository.addMetricData(conn, metricId, data)

    # Turn on monitoring
    modelSpec = {
      "datasource": "custom",

      "metricSpec": {
        "metric": metricName
      },
    }

    adapter.monitorMetric(modelSpec)

    def checkExportSpec(exportSpec):
      self.assertEqual(exportSpec["datasource"], modelSpec["datasource"])
      self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"])
      self.assertSequenceEqual(exportSpec["data"], data)

    # Export
    exportSpec = adapter.exportModel(metricId)
    checkExportSpec(exportSpec)

    # Delete metric
    adapter.deleteMetricByName(metricName)
    self.checkModelDeleted(metricId)

    # Import
    metricId = adapter.importModel(
      htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec)))

    with self.engine.connect() as conn:
      metricObj = repository.getMetric(conn,
                                       metricId,
                                       fields=[schema.metric.c.parameters,
                                               schema.metric.c.status])
    self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA)
    self.assertEqual(json.loads(metricObj.parameters), modelSpec)

    self._validateModelSpec(json.loads(metricObj.parameters))

    # Export again
    exportSpec = adapter.exportModel(metricId)
    checkExportSpec(exportSpec)