コード例 #1
0
  def start(self):
    # Removes possible left over cached engine
    # (needed if non-patched engine is run prior)
    repository.engineFactory(reset=True)

    # Override the Repository database name
    try:
      self._configPatch.start()
      self._configPatchApplied = True

      # Verity that the database doesn't exist yet
      assert self.tempDatabaseName not in getAllDatabaseNames(), (
        "Temp repo db=%s already existed" % (self.tempDatabaseName,))

      # Now create the temporary repository database
      self._attemptedToCreateRepository = True
      repository.reset()

      # Verify that the temporary repository database got created
      assert self.tempDatabaseName in getAllDatabaseNames(), (
        "Temp repo db=%s not found" % (self.tempDatabaseName,))
    except:
      # Attempt to clean up
      self.stop()

      raise
コード例 #2
0
    def testMetricDataForRandomRowID(uid):
      '''
        This tests if the metric data returned by the GET call :
          _models/<uid>/data
        has anomaly_score consistent with what is there in the actual
        database by asserting it against a dao.MetricData.get() call
        It repeats the process for 5 random sample rows for each uid
        in the database.

        Algorithm :
        - Query the MetricDataHandler GET call for a certain uid
        - Check if response is OK
        - Find the last row id for the uid
        - Select a random row between 1 and last row id
        - Find the anomaly score for that row id
        - Assert on the anomaly score
      '''
      response = self.app.get("/%s/data" %uid, headers=self.headers)
      assertions.assertSuccess(self, response)
      getAllModelsResult = utils.jsonDecode(response.body)
      with repository.engineFactory().connect() as conn:
        lastRowID = repository.getMetric(conn, uid).last_rowid
      for _ in range(5):
        randomRowID = randrange(1, lastRowID)
        with repository.engineFactory().connect() as conn:
          singleMetricData = repository.getMetricData(
            conn,
            uid,
            rowid=randomRowID).first()
        metricData = getMetricDataWithRowID(getAllModelsResult['data'],
          randomRowID)
        self.assertEqual(metricData[2], singleMetricData.anomaly_score)
        self.assertEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp)
コード例 #3
0
ファイル: models_api.py プロジェクト: bopopescu/what
  def GET(self):
    """
    Get model data stats

    ::

        GET /_models/data/stats

    Returns:

    ::

        {
            "processing_time_remaining": 37
        }
    """
    with repository.engineFactory().connect() as conn:
      unprocessedDataCount = repository.getUnprocessedModelDataCount(conn)
    processingTimeRemaining = int(math.ceil(
        unprocessedDataCount * _PROCESSING_TIME_PER_RECORD))

    self.addStandardHeaders()
    return utils.jsonEncode({
        "processing_time_remaining": processingTimeRemaining,
    })
コード例 #4
0
ファイル: autostacks_api.py プロジェクト: bopopescu/what
  def GET(self, autostackId, *args): # pylint: disable=C0103,W0613
    """
      Get Metrics associated with autostack

      ::

          GET /_autostacks/{autostackId}/metrics

      NOTE: args is ignored.  Function signature for all method handlers must
      be compatible with the regexp pattern that matches.  POST optionally
      takes a second argument, DELETE requires it.
    """
    try:
      self.addStandardHeaders()
      engine = repository.engineFactory()
      metricRows = repository.getAutostackMetrics(engine,
                                                  autostackId,
                                                  getMetricDisplayFields(engine))
      metricsList = [convertMetricRowToMetricDict(metricRow)
                     for metricRow in metricRows]

      return utils.jsonEncode(metricsList)

    except ObjectNotFoundError:
      raise web.notfound("Autostack not found: Autostack ID: %s" % autostackId)
    except web.HTTPError as ex:
      if bool(re.match(r"([45][0-9][0-9])\s?", web.ctx.status)):
        # Log 400-599 status codes as errors, ignoring 200-399
        log.error(str(ex) or repr(ex))
      raise
    except Exception as ex:
      raise web.internalerror(str(ex) or repr(ex))
コード例 #5
0
    def testMetricDataTimeStampQueryParams(uid):
      '''
        This test makes MetricDataHandler GET calls with from and to params :
          _models/<uid>/data?from=<>&to=<>
      '''
      with repository.engineFactory().connect() as conn:
        firstMetricData = conn.execute(
          sql.select([schema.metric_data])
          .where(schema.metric_data.c.uid == uid)
          .order_by(sql.expression.asc(schema.metric_data.c.timestamp))
          .limit(1)).fetchall()

        lastMetricData = conn.execute(
          sql.select([schema.metric_data])
          .where(schema.metric_data.c.uid == uid)
          .order_by(sql.expression.desc(schema.metric_data.c.timestamp))
          .limit(1)).fetchall()
      firstTimeStamp = firstMetricData[0].timestamp
      lastTimeStamp = lastMetricData[0].timestamp
      response = self.app.get("/%s/data?from=%s&to=%s"
        % (uid, firstTimeStamp, lastTimeStamp), headers=self.headers)
      assertions.assertSuccess(self, response)
      getAllModelsResult = utils.jsonDecode(response.body)
      for metricData in getAllModelsResult['data']:
        self.assertGreaterEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), firstTimeStamp)
        self.assertLessEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), lastTimeStamp)
コード例 #6
0
 def testMetricDataQueryParams(uid):
   '''
     This test makes MetricDataHandler GET calls with various params :
       _models/<uid>/data?from=<>&to=<>&anomaly=<>
   '''
   with repository.engineFactory().connect() as conn:
     firstMetricData = conn.execute(
       "SELECT * FROM `metric_data` WHERE `uid`='%s' "
       "and abs(`anomaly_score` - 0) > 1e-5 "
       "ORDER BY `timestamp` ASC LIMIT 1" % uid).fetchall()
     lastMetricData = conn.execute(
       "SELECT * FROM `metric_data` WHERE `uid`='%s' "
       "and abs(`anomaly_score` - 0) > 1e-5 "
       "ORDER BY `timestamp` DESC LIMIT 1" % uid).fetchall()
   firstTimeStamp = firstMetricData[0].timestamp
   lastTimeStamp = lastMetricData[0].timestamp
   anomalyScore = firstMetricData[0].anomaly_score
   response = self.app.get("/%s/data?from=%s&to=%s&anomaly=%s"
     % (uid, firstTimeStamp, lastTimeStamp, anomalyScore),
     headers=self.headers)
   assertions.assertSuccess(self, response)
   getAllModelsResult = utils.jsonDecode(response.body)
   for metricData in getAllModelsResult['data']:
     self.assertGreaterEqual(metricData[2], anomalyScore)
     self.assertGreaterEqual(datetime.strptime(metricData[0],
       '%Y-%m-%d %H:%M:%S'), firstTimeStamp)
     self.assertLessEqual(datetime.strptime(metricData[0],
       '%Y-%m-%d %H:%M:%S'), lastTimeStamp)
コード例 #7
0
    def checkModelIsActive(self, uid):
        engine = repository.engineFactory()
        with engine.begin() as conn:
            metricObj = repository.getMetric(conn,
                                             uid,
                                             fields=[schema.metric.c.status])

        self.assertEqual(metricObj.status, MetricStatus.ACTIVE)
コード例 #8
0
ファイル: test_case_base.py プロジェクト: darian19/what
  def checkModelIsActive(self, uid):
    engine = repository.engineFactory()
    with engine.begin() as conn:
      metricObj = repository.getMetric(conn,
                                       uid,
                                       fields=[schema.metric.c.status])

    self.assertEqual(metricObj.status, MetricStatus.ACTIVE)
コード例 #9
0
ファイル: models_api_test.py プロジェクト: bopopescu/what
  def tearDownClass(cls):
    try:
      engine = repository.engineFactory()
      with engine.connect() as conn:
        repository.deleteMetric(conn, cls.uid)

      with engine.connect() as conn:
        _ = repository.getMetric(conn, cls.uid)
    except ObjectNotFoundError:
      g_logger.info("Successful clean-up")
    else:
      g_logger.error("Test failed to delete metric=%s", cls.uid)
コード例 #10
0
ファイル: models_api_test.py プロジェクト: darian19/what
    def tearDownClass(cls):
        try:
            engine = repository.engineFactory()
            with engine.connect() as conn:
                repository.deleteMetric(conn, cls.uid)

            with engine.connect() as conn:
                _ = repository.getMetric(conn, cls.uid)
        except ObjectNotFoundError:
            g_logger.info("Successful clean-up")
        else:
            g_logger.error("Test failed to delete metric=%s", cls.uid)
コード例 #11
0
    def checkStats(self, metricName, mn, mx):
        """Check that stats are computed correctly from the database"""
        engine = repository.engineFactory()
        with engine.begin() as conn:
            metricObj = (repository.getCustomMetricByName(
                conn,
                metricName,
                fields=[schema.metric.c.uid, schema.metric.c.parameters]))
            stats = repository.getMetricStats(conn, metricObj.uid)

        self.assertSetEqual(set(stats.keys()), set(("min", "max")))
        self.assertAlmostEqual(stats["min"], mn)
        self.assertAlmostEqual(stats["max"], mx)
コード例 #12
0
    def checkMetricUnmonitoredById(self, uid):
        engine = repository.engineFactory()
        with engine.begin() as conn:
            metricObj = repository.getMetric(
                conn,
                uid,
                fields=[schema.metric.c.status, schema.metric.c.parameters])

        self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
        self.assertIsNone(metricObj.parameters)

        with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
            model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
コード例 #13
0
ファイル: test_case_base.py プロジェクト: darian19/what
  def checkMetricUnmonitoredById(self, uid):
    engine = repository.engineFactory()
    with engine.begin() as conn:
      metricObj = repository.getMetric(conn,
                                       uid,
                                       fields=[schema.metric.c.status,
                                               schema.metric.c.parameters])

    self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
    self.assertIsNone(metricObj.parameters)

    with self.assertRaises(model_checkpoint_mgr.ModelNotFound):
      model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
コード例 #14
0
ファイル: test_case_base.py プロジェクト: darian19/what
  def checkStats(self, metricName, mn, mx):
    """Check that stats are computed correctly from the database"""
    engine = repository.engineFactory()
    with engine.begin() as conn:
      metricObj = (
        repository.getCustomMetricByName(conn,
                                         metricName,
                                         fields=[schema.metric.c.uid,
                                                 schema.metric.c.parameters]))
      stats = repository.getMetricStats(conn, metricObj.uid)

    self.assertSetEqual(set(stats.keys()), set(("min", "max")))
    self.assertAlmostEqual(stats["min"], mn)
    self.assertAlmostEqual(stats["max"], mx)
コード例 #15
0
 def testMetricDataAnomalyAsQueryParams(uid):
   '''
     This test makes MetricDataHandler GET calls with anomaly param :
       _models/<uid>/data?anomaly=<>
   '''
   queryString = ("SELECT * FROM metric_data WHERE uid='%s' "
                  "   and abs(anomaly_score - 0) > 1e-5 LIMIT 1") % uid
   with repository.engineFactory().connect() as conn:
     sampleMetricData = conn.execute(queryString).first()
   anomalyScore = sampleMetricData.anomaly_score
   response = self.app.get("/%s/data?anomaly=%s"
     % (uid, anomalyScore), headers=self.headers)
   assertions.assertSuccess(self, response)
   getAllModelsResult = utils.jsonDecode(response.body)
   for metricData in getAllModelsResult['data']:
     self.assertGreaterEqual(metricData[2], anomalyScore)
コード例 #16
0
ファイル: test_mysql.py プロジェクト: darian19/what
  def testTablesCreatedWithInnoDBEngine(self):
    """
    Tests to make sure that all of the tables in the YOMP table_schema were
    created using the InnoDB engine to preserve referential integrity.

    At this time, it is checking all tables in the DB; in the future, if we do
    not require referential integrity, we can explicitly whitelist specific
    tables to allow those to use `MyISAM` or another engine.
    """
    engine = repository.engineFactory()
    result = engine.execute("SELECT table_name, engine "
                            "FROM information_schema.tables "
                            "WHERE table_schema = 'YOMP'")

    for row in result:
      self.assertEqual(row.engine, "InnoDB",
        ("Table %s was created with the wrong engine type" % row["table_name"]))
コード例 #17
0
    def _runBasicChecksOnModel(self, modelId, _adapter, modelSpec):
        with repository.engineFactory().connect() as conn:
            metricObj = repository.getMetric(conn, modelId)
        _LOG.info(
            "Making sure metric is CREATE_PENDING or ACTIVE or PENDING_DATA")

        self.assertIn(metricObj.status, [
            MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE,
            MetricStatus.PENDING_DATA
        ])

        _LOG.info("Checking modelSpec")
        self.assertEqual(jsonDecode(metricObj.parameters), modelSpec)

        _LOG.info("Waiting for model to become active")
        self.checkModelIsActive(modelId)

        _LOG.info("Waiting at least one model result")
        self.checkModelResultsSize(modelId, 1, atLeast=True)
コード例 #18
0
    def testTablesCreatedWithInnoDBEngine(self):
        """
    Tests to make sure that all of the tables in the YOMP table_schema were
    created using the InnoDB engine to preserve referential integrity.

    At this time, it is checking all tables in the DB; in the future, if we do
    not require referential integrity, we can explicitly whitelist specific
    tables to allow those to use `MyISAM` or another engine.
    """
        engine = repository.engineFactory()
        result = engine.execute("SELECT table_name, engine "
                                "FROM information_schema.tables "
                                "WHERE table_schema = 'YOMP'")

        for row in result:
            self.assertEqual(
                row.engine, "InnoDB",
                ("Table %s was created with the wrong engine type" %
                 row["table_name"]))
コード例 #19
0
  def _runBasicChecksOnModel(self, modelId, _adapter, modelSpec):
    with repository.engineFactory().connect() as conn:
      metricObj = repository.getMetric(conn, modelId)
    _LOG.info("Making sure metric is CREATE_PENDING or ACTIVE or PENDING_DATA")

    self.assertIn(
      metricObj.status,
      [MetricStatus.CREATE_PENDING,
       MetricStatus.ACTIVE,
       MetricStatus.PENDING_DATA])

    _LOG.info("Checking modelSpec")
    self.assertEqual(jsonDecode(metricObj.parameters), modelSpec)

    _LOG.info("Waiting for model to become active")
    self.checkModelIsActive(modelId)

    _LOG.info("Waiting at least one model result")
    self.checkModelResultsSize(modelId, 1, atLeast=True)
コード例 #20
0
ファイル: test_case_base.py プロジェクト: darian19/what
  def checkEncoderResolution(self, uid, minVal, maxVal):
    """Check that encoder resolution is computed correctly."""
    engine = repository.engineFactory()
    with engine.begin() as conn:
      metricObj = repository.getMetric(conn,
                                       uid,
                                       fields=[schema.metric.c.name,
                                               schema.metric.c.model_params])

    modelParams = json.loads(metricObj.model_params)
    self.assertNotEqual(modelParams, None,
                        "No model exists for metric %s" % metricObj.name)
    sensorParams = modelParams["modelConfig"]["modelParams"]["sensorParams"]
    encoderParams = sensorParams["encoders"]["c1"]
    # Estimate and check the bounds for the resolution based on min and max
    lower = (maxVal - minVal) / 300.0
    upper = (maxVal - minVal) / 80.0
    self.assertGreater(encoderParams["resolution"], lower)
    self.assertLess(encoderParams["resolution"], upper)
コード例 #21
0
  def setUpClass(cls):
    with open(os.path.join(
        YOMP.app.YOMP_HOME,
        "tests/py/integration/app/test_resources.yaml")) as fin:
      resources = yaml.load(fin)
    testCase = resources[aws_base.ResourceTypeNames.EC2_INSTANCE][0]

    cls._testRegion = testCase["region"]
    cls._testId = testCase["dimensions"]["InstanceId"]
    # Load YOMP API Key as required by TestCaseBase
    cls.apiKey = YOMP.app.config.get("security", "apikey")

    cls._modelSpecNoMinMax = {"datasource":testCase["datasource"],
                              "metricSpec":{
                                "region":testCase["region"],
                                "namespace":testCase["namespace"],
                                "metric":testCase["metric"],
                                "dimensions":testCase["dimensions"]}}

    cls.engine = repository.engineFactory()
コード例 #22
0
    def checkEncoderResolution(self, uid, minVal, maxVal):
        """Check that encoder resolution is computed correctly."""
        engine = repository.engineFactory()
        with engine.begin() as conn:
            metricObj = repository.getMetric(
                conn,
                uid,
                fields=[schema.metric.c.name, schema.metric.c.model_params])

        modelParams = json.loads(metricObj.model_params)
        self.assertNotEqual(modelParams, None,
                            "No model exists for metric %s" % metricObj.name)
        sensorParams = modelParams["modelConfig"]["modelParams"][
            "sensorParams"]
        encoderParams = sensorParams["encoders"]["c1"]
        # Estimate and check the bounds for the resolution based on min and max
        lower = (maxVal - minVal) / 300.0
        upper = (maxVal - minVal) / 80.0
        self.assertGreater(encoderParams["resolution"], lower)
        self.assertLess(encoderParams["resolution"], upper)
コード例 #23
0
ファイル: aggregator_service.py プロジェクト: darian19/what
    def run(self):
        with ModelSwapperInterface() as modelSwapper:
            engine = repository.engineFactory()
            while True:
                with engine.connect() as conn:
                    pendingStacks = repository.retryOnTransientErrors(
                        repository.getAutostackMetricsPendingDataCollection
                    )(conn)

                if not pendingStacks:
                    time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC)
                    continue

                # Build a sequence of autostack metric requests
                requests = []
                for autostack, metrics in pendingStacks:
                    refBase = len(requests)
                    requests.extend(
                        AutostackMetricRequest(refID=refBase + i, autostack=autostack, metric=metric)
                        for i, metric in enumerate(metrics)
                    )

                # Collect, aggregate, and stream metric data
                self._processAutostackMetricRequests(engine, requests, modelSwapper)
コード例 #24
0
ファイル: aggregator_service.py プロジェクト: bopopescu/what
  def run(self):
    with ModelSwapperInterface() as modelSwapper:
      engine = repository.engineFactory()
      while True:
        with engine.connect() as conn:
          pendingStacks = repository.retryOnTransientErrors(
            repository.getAutostackMetricsPendingDataCollection)(conn)

        if not pendingStacks:
          time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC)
          continue

        # Build a sequence of autostack metric requests
        requests = []
        for autostack, metrics in pendingStacks:
          refBase = len(requests)
          requests.extend(
            AutostackMetricRequest(refID=refBase + i,
                                   autostack=autostack,
                                   metric=metric)
            for i, metric in enumerate(metrics))

        # Collect, aggregate, and stream metric data
        self._processAutostackMetricRequests(engine, requests, modelSwapper)
コード例 #25
0
ファイル: models_api.py プロジェクト: bopopescu/what
def formatMetricRowProxy(metricObj):
  if metricObj.tag_name is not None and len(metricObj.tag_name) > 0:
    displayName = "%s (%s)" % (metricObj.tag_name, metricObj.server)
  else:
    displayName = metricObj.server

  if (hasattr(metricObj, "parameters") and
      isinstance(metricObj.parameters, basestring)):
    parameters = json.loads(metricObj.parameters)
  else:
    parameters = metricObj.parameters

  engine = repository.engineFactory()

  allowedKeys = set([col.name for col in getMetricDisplayFields(engine)])

  metricDict = dict((col, getattr(metricObj, col))
                    for col in metricObj.keys()
                    if col in allowedKeys)

  metricDict["display_name"] = displayName
  metricDict["parameters"] = parameters

  return metricDict
コード例 #26
0
    def setUpClass(cls):
        with open(
                os.path.join(
                    YOMP.app.YOMP_HOME,
                    "tests/py/integration/app/test_resources.yaml")) as fin:
            resources = yaml.load(fin)
        testCase = resources[aws_base.ResourceTypeNames.EC2_INSTANCE][0]

        cls._testRegion = testCase["region"]
        cls._testId = testCase["dimensions"]["InstanceId"]
        # Load YOMP API Key as required by TestCaseBase
        cls.apiKey = YOMP.app.config.get("security", "apikey")

        cls._modelSpecNoMinMax = {
            "datasource": testCase["datasource"],
            "metricSpec": {
                "region": testCase["region"],
                "namespace": testCase["namespace"],
                "metric": testCase["metric"],
                "dimensions": testCase["dimensions"]
            }
        }

        cls.engine = repository.engineFactory()
コード例 #27
0
ファイル: anomalies_api.py プロジェクト: darian19/what
  def GET(self):
    """
    Get metrics, sorted by AWS name tag / instance ID

    :returns: List of metrics
    :rtype: list

    Example request::

      GET /_anomalies/name

    Example response::

      [
        {
          "status": 1,
          "last_rowid": 4033,
          "display_name": "jenkins-master (us-west-2/AWS/EC2/i-12345678)",
          "description": "NetworkIn on EC2 instance i-12345678 in us-west-2",
          "name": "AWS/EC2/NetworkIn",
          "last_timestamp": "2014-04-14 20:29:00",
          "poll_interval": 300,
          "server": "us-west-2/AWS/EC2/i-12345678",
          "tag_name": "jenkins-master",
          "datasource": "cloudwatch",
          "location": "us-west-2",
          "message": null,
          "parameters": {
            "InstanceId": "i-12345678",
            "region": "us-west-2"
          },
          "uid": "0b6b97022fdb4134936aae92aa67393b"
        },
        ...
      ]

    """

    try:
      self.addStandardHeaders()

      engine = repository.engineFactory()

      with engine.connect() as conn:
        modelIterator = repository.getAllMetrics(conn, fields=getMetricDisplayFields(conn))
        modelsList = [convertMetricRowToMetricDict(model) for model in modelIterator]

      # Sort by tag_name, and then parameters=>InstanceID
      def cmpFn(model1, model2):
        name1 = model1["tag_name"]
        name2 = model2["tag_name"]
        id1 = model1["parameters"].get("InstanceID")
        id2 = model2["parameters"].get("InstanceID")

        if name1 and not name2:
          return -1
        elif name2 and not name1:
          return 1
        elif name1 != name2:
          return cmp(name1, name2)
        elif id1 and not id2:
          return -1
        elif id2 and not id1:
          return 1
        elif id1 != id2:
          return cmp(id1, id2)
        return 0

      modelsList.sort(cmpFn)

      return utils.jsonEncode(modelsList)

    except (web.HTTPError) as ex:
      log.info(str(ex) or repr(ex))
      raise ex

    except Exception as ex:
      log.exception("GET Failed")
      raise web.internalerror(str(ex) or repr(ex))
コード例 #28
0
  def testModelInferencesLifeCycle(self):
    startTime = time()
    for model in sorted(self.data):
      #create a model; post is forwarded to put
      print "Creating metric for %s : " % model
      response = self.app.put("/", json.dumps(model),
          headers=self.headers)
      assertions.assertSuccess(self, response, code=201)

    response = self.app.get("/", headers=self.headers)
    assertions.assertSuccess(self, response)
    getAllModelsResult = utils.jsonDecode(response.body)
    totalMetricCount = len(getAllModelsResult)
    self.assertEqual(totalMetricCount, len(self.data))

    #Get the uids of all the metrics created.
    uids = [metric['uid'] for metric in getAllModelsResult]

    while True:
      with repository.engineFactory().connect() as conn:
        initialModelCount = conn.execute(
          sql.select([sql.func.count()], from_obj=schema.metric_data)
          .where(schema.metric_data.c.rowid == 1)).scalar()
      if initialModelCount == totalMetricCount:
        print "Done creating all the initial models."
        break

      # Exit the test with some non-zero status if the test has run for more
      # than 20 minutes to just create the initial models.
      # Should not take more than that.

      currentElapsedTime = (time() - startTime) / 60
      print "Current elapsed time %s" % currentElapsedTime
      if currentElapsedTime > 20:
        print "More than 20 minutes has elapsed. Timing out."
        sys.exit(42)
      print "%s initial models created." % initialModelCount
      print "Creating initial models for rest of the %s metrics" \
        "..." % (totalMetricCount - initialModelCount)
      sleep(60)


    #Sleep for a long time.
    minutes = 15
    print "Sleeping for %s minutes to let things settled down." % minutes
    while minutes > 0:
      print "Resume in %s minutes." % minutes
      minutes -= 1
      sleep(60)

    modelCreationDuration = (time() - startTime) / 60

    with repository.engineFactory().connect() as conn:
      lastRowIds = {uid: repository.getMetric(conn, uid).last_rowid
                    for uid in uids}
    modelInferenceWithNonNullAnomalyScore = []
    modelIds = lastRowIds.keys()
    while True:
      print set(modelInferenceWithNonNullAnomalyScore)
      if len(modelIds) == len(set(modelInferenceWithNonNullAnomalyScore)):
        print "Model inferences created for last_rowids for all the models."
        break
      for uid in modelIds:
        with repository.engineFactory().connect() as conn:
          anomalyNullCount = conn.execute(
            sql.select([sql.func.count()], from_obj=schema.metric_data)
            .where(schema.metric_data.c.rowid == lastRowIds[uid])
            .where(schema.metric_data.c.uid == uid)
            .where(schema.metric_data.c.anomaly_score == None)).scalar()
        print "Model (%s) - Last Row ID (%s) : %s" \
          % (uid, lastRowIds[uid], anomalyNullCount)
        if anomalyNullCount == 0:
          modelInferenceWithNonNullAnomalyScore.append(uid)

      # Exit the test with some non-zero status if the test has run for more
      # than 2 hours

      currentElapsedTime = (time() - startTime) / 60
      print "Current elapsed time %s" % currentElapsedTime
      if currentElapsedTime > 120:
        print "More than 2 hours has elapsed. Timing out."
        sys.exit(42)
      print "Going back to sleep for 60s..."
      sleep(60)

    self.assertEqual(anomalyNullCount, 0)
    timeToCalculateAllInferences = time()


    def getMetricDataWithRowID(metricDataList, rowid):
      '''
        Helper method to get the metric data of the nth row for a certain uid
      '''
      for metricData in metricDataList:
        if metricData[3] == rowid:
          return metricData


    def testMetricDataForRandomRowID(uid):
      '''
        This tests if the metric data returned by the GET call :
          _models/<uid>/data
        has anomaly_score consistent with what is there in the actual
        database by asserting it against a dao.MetricData.get() call
        It repeats the process for 5 random sample rows for each uid
        in the database.

        Algorithm :
        - Query the MetricDataHandler GET call for a certain uid
        - Check if response is OK
        - Find the last row id for the uid
        - Select a random row between 1 and last row id
        - Find the anomaly score for that row id
        - Assert on the anomaly score
      '''
      response = self.app.get("/%s/data" %uid, headers=self.headers)
      assertions.assertSuccess(self, response)
      getAllModelsResult = utils.jsonDecode(response.body)
      with repository.engineFactory().connect() as conn:
        lastRowID = repository.getMetric(conn, uid).last_rowid
      for _ in range(5):
        randomRowID = randrange(1, lastRowID)
        with repository.engineFactory().connect() as conn:
          singleMetricData = repository.getMetricData(
            conn,
            uid,
            rowid=randomRowID).first()
        metricData = getMetricDataWithRowID(getAllModelsResult['data'],
          randomRowID)
        self.assertEqual(metricData[2], singleMetricData.anomaly_score)
        self.assertEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp)

    map(testMetricDataForRandomRowID, uids)


    def testMetricDataAnomalyAsQueryParams(uid):
      '''
        This test makes MetricDataHandler GET calls with anomaly param :
          _models/<uid>/data?anomaly=<>
      '''
      queryString = ("SELECT * FROM metric_data WHERE uid='%s' "
                     "   and abs(anomaly_score - 0) > 1e-5 LIMIT 1") % uid
      with repository.engineFactory().connect() as conn:
        sampleMetricData = conn.execute(queryString).first()
      anomalyScore = sampleMetricData.anomaly_score
      response = self.app.get("/%s/data?anomaly=%s"
        % (uid, anomalyScore), headers=self.headers)
      assertions.assertSuccess(self, response)
      getAllModelsResult = utils.jsonDecode(response.body)
      for metricData in getAllModelsResult['data']:
        self.assertGreaterEqual(metricData[2], anomalyScore)

    map(testMetricDataAnomalyAsQueryParams, uids)


    def testMetricDataTimeStampQueryParams(uid):
      '''
        This test makes MetricDataHandler GET calls with from and to params :
          _models/<uid>/data?from=<>&to=<>
      '''
      with repository.engineFactory().connect() as conn:
        firstMetricData = conn.execute(
          sql.select([schema.metric_data])
          .where(schema.metric_data.c.uid == uid)
          .order_by(sql.expression.asc(schema.metric_data.c.timestamp))
          .limit(1)).fetchall()

        lastMetricData = conn.execute(
          sql.select([schema.metric_data])
          .where(schema.metric_data.c.uid == uid)
          .order_by(sql.expression.desc(schema.metric_data.c.timestamp))
          .limit(1)).fetchall()
      firstTimeStamp = firstMetricData[0].timestamp
      lastTimeStamp = lastMetricData[0].timestamp
      response = self.app.get("/%s/data?from=%s&to=%s"
        % (uid, firstTimeStamp, lastTimeStamp), headers=self.headers)
      assertions.assertSuccess(self, response)
      getAllModelsResult = utils.jsonDecode(response.body)
      for metricData in getAllModelsResult['data']:
        self.assertGreaterEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), firstTimeStamp)
        self.assertLessEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), lastTimeStamp)

    map(testMetricDataTimeStampQueryParams, uids)


    def testMetricDataQueryParams(uid):
      '''
        This test makes MetricDataHandler GET calls with various params :
          _models/<uid>/data?from=<>&to=<>&anomaly=<>
      '''
      with repository.engineFactory().connect() as conn:
        firstMetricData = conn.execute(
          "SELECT * FROM `metric_data` WHERE `uid`='%s' "
          "and abs(`anomaly_score` - 0) > 1e-5 "
          "ORDER BY `timestamp` ASC LIMIT 1" % uid).fetchall()
        lastMetricData = conn.execute(
          "SELECT * FROM `metric_data` WHERE `uid`='%s' "
          "and abs(`anomaly_score` - 0) > 1e-5 "
          "ORDER BY `timestamp` DESC LIMIT 1" % uid).fetchall()
      firstTimeStamp = firstMetricData[0].timestamp
      lastTimeStamp = lastMetricData[0].timestamp
      anomalyScore = firstMetricData[0].anomaly_score
      response = self.app.get("/%s/data?from=%s&to=%s&anomaly=%s"
        % (uid, firstTimeStamp, lastTimeStamp, anomalyScore),
        headers=self.headers)
      assertions.assertSuccess(self, response)
      getAllModelsResult = utils.jsonDecode(response.body)
      for metricData in getAllModelsResult['data']:
        self.assertGreaterEqual(metricData[2], anomalyScore)
        self.assertGreaterEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), firstTimeStamp)
        self.assertLessEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), lastTimeStamp)

    map(testMetricDataQueryParams, uids)


    endTime = (time() - startTime) / 60

    print "Test started at        : %s" % \
          strftime('%Y-%m-%d %H:%M:%S', localtime(startTime))
    print "Test finished at       : %s" % \
          strftime('%Y-%m-%d %H:%M:%S', localtime(endTime))
    print "Total metric count     : %s" % totalMetricCount
    print "Initial models created : %s" % initialModelCount
    print "Approximate time taken to create inital models : %s minutes" \
      % modelCreationDuration
    print "Approximate time taken to calculate all inferences : %s minutes" \
      % ((timeToCalculateAllInferences - startTime) / 60)
    print "Approximate time taken for all the tests to finish : %s minutes" \
      % ((time() - startTime) / 60)
コード例 #29
0
ファイル: aggregation.py プロジェクト: darian19/what
def getStatistics(metric):
    """Get aggregate statistics for an Autostack metric.

  The metric must belong to an Autostack or a ValueError will be raised. If AWS
  returns no stats and there is no data in the database then an
  ObjectNotFoundError will be raised.

  :param metric: the Autostack metric to get statistics for
  :type metric: TODO

  :returns: metric statistics
  :rtype: dict {"min": minVal, "max": maxVal}

  :raises: ValueError if the metric doesn't not belong to an Autostack

  :raises: YOMP.app.exceptions.ObjectNotFoundError if the metric or the
      corresponding autostack doesn't exist; this may happen if it got deleted
      by another process in the meantime.

  :raises: YOMP.app.exceptions.MetricStatisticsNotReadyError if there are no or
      insufficent samples at this time; this may also happen if the metric and
      its data were deleted by another process in the meantime
  """
    engine = repository.engineFactory()

    if metric.datasource != "autostack":
        raise ValueError("Metric must belong to an Autostack but has datasource=%r" % metric.datasource)
    metricGetter = EC2InstanceMetricGetter()
    try:
        with engine.connect() as conn:
            autostack = repository.getAutostackFromMetric(conn, metric.uid)
        instanceMetricList = metricGetter.collectMetricStatistics(autostack, metric)
    finally:
        metricGetter.close()

    n = 0
    mins = 0.0
    maxs = 0.0
    for instanceMetric in instanceMetricList:
        assert len(instanceMetric.records) == 1
        metricRecord = instanceMetric.records[0]
        stats = metricRecord.value

        if (
            not isinstance(stats["min"], numbers.Number)
            or math.isnan(stats["min"])
            or not isinstance(stats["max"], numbers.Number)
            or math.isnan(stats["max"])
        ):
            # Cloudwatch gave us bogus data for this metric so we will exclude it
            continue

        mins += stats["min"]
        maxs += stats["max"]
        n += 1

    if n == 0:
        # Fall back to metric_data when we don't get anything from AWS. This may
        # raise an MetricStatisticsNotReadyError if there is no or not enough data.
        with engine.connect() as conn:
            dbStats = repository.getMetricStats(conn, metric.uid)
        minVal = dbStats["min"]
        maxVal = dbStats["max"]
    else:
        minVal = mins / n
        maxVal = maxs / n

    # Now add the 20% buffer on the range
    buff = (maxVal - minVal) * 0.2
    minVal -= buff
    maxVal += buff

    return {"min": minVal, "max": maxVal}
コード例 #30
0
 def setUpClass(cls):
     # Load YOMP API Key as required by TestCaseBase
     cls.apiKey = YOMP.app.config.get("security", "apikey")
     cls.engine = repository.engineFactory()
コード例 #31
0
ファイル: metric_collector.py プロジェクト: darian19/what
  def run(self):
    """ Collect metric data and status for active metrics
    """
    # NOTE: the process pool must be created BEFORE this main (parent) process
    # creates any global or class-level shared resources (e.g., boto
    # connection) that would have undersirable consequences when
    # replicated into and used by forked child processes (e.g., the same MySQL
    # connection socket file descriptor used by multiple processes). And we
    # can't take advantage of the process Pool's maxtasksperchild feature
    # either (for the same reason)
    self._log.info("Starting YOMP Metric Collector")
    resultsQueue = multiprocessing.Manager().JoinableQueue()

    recvPipe, sendPipe = multiprocessing.Pipe(False)

    processPool = multiprocessing.Pool(
      processes=self._WORKER_PROCESS_POOL_SIZE,
      maxtasksperchild=None)

    try:
      with ModelSwapperInterface() as modelSwapper:
        engine = repository.engineFactory()
        while True:
          startTime = time.time()

          if startTime > self._nextCacheGarbageCollectionTime:
            # TODO: unit-test
            self._garbageCollectInfoCache()

          # Determine which metrics are due for an update
          metricsToUpdate = self._getCandidateMetrics(engine)

          filterDuration = time.time() - startTime

          if not metricsToUpdate:
            time.sleep(self._NO_PENDING_METRICS_SLEEP_SEC)
            continue

          # Collect metric data
          collectionStartTime = time.time()

          poolResults = self._collectDataForMetrics(metricsToUpdate,
                                                    processPool,
                                                    resultsQueue)

          # Process/dispatch results in parallel in another thread as results
          # become available in resultsQueue
          dispatchThread = (
            threading.Thread(target=self._processAndDispatchThreadTarget,
                             args=(engine,
                                   metricsToUpdate,
                                   resultsQueue,
                                   modelSwapper,
                                   sendPipe)))
          dispatchStartTime = time.time()
          dispatchThread.start()

          # Syncronize with processPool
          poolResults.wait() # Wait for collection tasks to complete

          metricPollDuration = time.time() - collectionStartTime

          resultsQueue.join() # Block until all tasks completed...

          # Syncronize with dispatchThread
          resultsQueue.put(self._SENTINEL) # Signal to dispatchThread that
                                           # there are no more results to
                                           # process.
          resultsQueue.join()
          numEmpty, numErrors = recvPipe.recv() # Get dispatchThread stats

          dispatchDuration = time.time() - dispatchStartTime

          self._log.info(
            "Processed numMetrics=%d; numEmpty=%d; numErrors=%d; "
            "duration=%.4fs (filter=%.4fs; query=%.4fs; dispatch=%.4fs)",
            len(metricsToUpdate), numEmpty, numErrors,
            time.time() - startTime, filterDuration,
            metricPollDuration, dispatchDuration)
    finally:
      self._log.info("Exiting Metric Collector run-loop")
      processPool.terminate()
      processPool.join()
コード例 #32
0
  def testCollectMetricStatistics(self):

    expectedStatisticNames = ["min", "max"]

    def validateStats(stats):
      self.assertIsInstance(stats, (list, tuple))

      timestamps = []
      for instanceMetrics in stats:
        self.assertEqual(len(instanceMetrics.records), 1)
        record = instanceMetrics.records[0]
        self.assertIsInstance(record.value, dict)
        self.assertGreater(len(record.value), 0)
        self.assertTrue(
          set(record.value.iterkeys()).issubset(expectedStatisticNames),
          msg=record.value)

        for metricValue in record.value.itervalues():
          self.assertIsInstance(metricValue, float, msg=instanceMetrics)
          self.assertFalse(math.isnan(metricValue))

        timestamps.append(record.timestamp)


      # Verify that all the stats timestamps are the same
      if timestamps:
        self.assertSequenceEqual(timestamps, [timestamps[0]] * len(timestamps))



    # Collection data for both autostack/metric combinations
    collector = EC2InstanceMetricGetter()
    self.addCleanup(collector.close)

    def _createAutostackMetric(conn, name, region, filters):
      autostackDict = repository.addAutostack(conn,
                                              name=name,
                                              region=region,
                                              filters=json.dumps(filters))

      modelSpec = {"modelParams": {},
                   "datasource": "autostack",
                   "metricSpec": {"slaveDatasource": "cloudwatch",
                                  "slaveMetric": {"metric": "CPUUtilization",
                                                  "namespace": "AWS/EC2"},
                                  "autostackId": autostackDict["uid"]}}

      metricDict = repository.addMetric(
          conn,
          datasource="autostack",
          name="CPUUtilization",
          description=("CPUUtilization on YOMP Autostack {0} in us-west-2 "
                       "region").format(name),
          server="Autostacks/{0}".format(autostackDict["uid"]),
          location=region,
          tag_name=name,
          parameters=htmengine.utils.jsonEncode(modelSpec),
          poll_interval=300,
          status=MetricStatus.UNMONITORED)

      repository.addMetricToAutostack(conn,
                                      autostackDict["uid"],
                                      metricDict["uid"])

      autostackObj = type("MutableAutostack", (object,), autostackDict)()
      autostackObj.filters = json.loads(autostackObj.filters)

      metricObj = type("MutableMetric", (object,), metricDict)()

      return autostackObj, metricObj

    # All instances in us-east-1
    engine = repository.engineFactory()
    with engine.begin() as conn:
      autostack1, m1 = (
        _createAutostackMetric(conn,
                               name="testCollectMetricStats1",
                               region="us-east-1",
                               filters={"tag:Name": ["*"]}))

      stats1 = collector.collectMetricStatistics(
        autostack=autostack1,
        metric=m1)
      print "STATS1:", stats1

      validateStats(stats1)
      self.assertGreaterEqual(len(stats1), 1)


      # All instances in us-west-2
      autostack2, m2 = _createAutostackMetric(conn,
                                              name="testCollectMetricStats2",
                                              region="us-west-2",
                                              filters={"tag:Name": ["*"]})

      stats2 = collector.collectMetricStatistics(
        autostack=autostack2,
        metric=m2)
      print "STATS2:", stats2
      validateStats(stats2)
      self.assertGreater(len(stats2), 1)


      # No matching instances in us-west-2
      autostack3, m3 = (
        _createAutostackMetric(
          conn,
          name="testCollectMetricStatistics3",
          region="us-west-2",
          filters={"tag:Name": ["NothingShouldMatchThis"]}))

      stats3 = collector.collectMetricStatistics(
        autostack=autostack3,
        metric=m3)
      print "STATS3:", stats3
      validateStats(stats3)
      self.assertEqual(len(stats3), 0)
コード例 #33
0
ファイル: logging_api.py プロジェクト: darian19/what
    def POST(cls):
        """Upload the metric info and metric data as a compressed tarfile to S3.

    The request must include the uid of the metric and may include other JSON
    keys as well. For instance, it is likely that a request from the mobile
    application will include information about the current view and data
    being displayed when the feedback request is sent. Any fields in addition
    to uid will be stored with the feedback archive file that is uploaded to
    S3.
    """
        inputData = json.loads(web.data())
        # Get the metric uid
        uid = inputData["uid"]
        del inputData["uid"]

        inputData["server_id"] = _MACHINE_ID

        # Data is written to a temporary directory before uploading
        path = tempfile.mkdtemp()

        try:
            # Retrieve the metric table record and add it to the other input
            # parameters
            metricFields = [
                schema.metric.c.uid,
                schema.metric.c.datasource,
                schema.metric.c.name,
                schema.metric.c.description,
                schema.metric.c.server,
                schema.metric.c.location,
                schema.metric.c.parameters,
                schema.metric.c.status,
                schema.metric.c.message,
                schema.metric.c.last_timestamp,
                schema.metric.c.poll_interval,
                schema.metric.c.tag_name,
                schema.metric.c.last_rowid,
            ]

            with repository.engineFactory().connect() as conn:
                metricRow = repository.getMetric(conn, uid, metricFields)
            metric = dict(
                [
                    (
                        col.name,
                        utils.jsonDecode(getattr(metricRow, col.name))
                        if col.name == "parameters"
                        else getattr(metricRow, col.name),
                    )
                    for col in metricFields
                ]
            )
            if metric["tag_name"]:
                metric["display_name"] = "%s (%s)" % (metric["tag_name"], metric["server"])
            else:
                metric["display_name"] = metric["server"]

            inputData["metric"] = utils.jsonEncode(metric)

            metricPath = os.path.join(path, "metric.json")
            with open(metricPath, "w") as f:
                json.dump(inputData, f)

            # Retrieve the metric data
            with repository.engineFactory().connect() as conn:
                metricDataRows = repository.getMetricData(conn, uid)
            metricData = [
                dict([(col.name, getattr(metricData, col.name)) for col in schema.metric_data.columns])
                for metricData in metricDataRows
            ]

            metricDataPath = os.path.join(path, "metric_data.csv")
            with open(metricDataPath, "w") as f:
                writer = csv.writer(f)
                if len(metricData) > 0:
                    header = metricData[0].keys()
                    # Write the field names first
                    writer.writerow(header)
                    # Then write out the data for each row
                    for dataDict in metricData:
                        row = [dataDict[h] for h in header]
                        writer.writerow(row)

            # Create a tarfile to upload
            ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
            filename = "metric_dump_%s_%s.tar.gz" % (uid, ts)
            tfPath = os.path.join(path, filename)
            with tarfile.open(tfPath, "w:gz") as tf:
                tf.add(metricPath, arcname=os.path.basename(metricPath))
                tf.add(metricDataPath, arcname=os.path.basename(metricDataPath))

            # Upload the tarfile
            return cls._uploadTarfile(filename, tfPath)

        finally:
            shutil.rmtree(path)
コード例 #34
0
    def testCollectMetricData(self):
        self.engine = repository.engineFactory(reset=True)

        with self.engine.connect() as conn:
            autostack1 = self._addAutostack(name="testCollectMetricData1",
                                            region="us-east-1",
                                            filters='{"tag:Name": ["*"]}')

            m1a = self._addAutostackMetric(conn, autostack1)
            m1b = self._addAutostackMetric(conn,
                                           autostack1,
                                           name="Autostacks/InstanceCount")

            autostack2 = self._addAutostack(name="testCollectMetricData2",
                                            region="us-west-2",
                                            filters='{"tag:Name": ["*?*"]}')

            m2 = self._addAutostackMetric(conn, autostack2)

            autostack3 = self._addAutostack(
                name="testCollectMetricData3",
                region="us-west-2",
                filters='{"tag:Name": ["NothingShouldMatchThis"]}')

            m3 = self._addAutostackMetric(conn, autostack3)

        # Collection data for both autostack/metric combinations
        collector = EC2InstanceMetricGetter()
        self.addCleanup(collector.close)

        requests = [
            AutostackMetricRequest(refID=1, autostack=autostack1, metric=m1a),
            AutostackMetricRequest(refID=2, autostack=autostack1, metric=m1b),
            AutostackMetricRequest(refID=3, autostack=autostack2, metric=m2),
            AutostackMetricRequest(refID=4, autostack=autostack3, metric=m3)
        ]

        metricCollections = dict(
            (collection.refID, collection)
            for collection in collector.collectMetricData(requests=requests))

        self.assertEqual(len(metricCollections), len(requests))

        def checkSliceSorted(records):
            sortedRecords = sorted(records,
                                   key=lambda record: record.timestamp)
            self.assertSequenceEqual(records, sortedRecords)

        def checkSliceUniqueTimestamps(records):
            timestamps = tuple(record.timestamp for record in records)
            for timestamp in timestamps:
                self.assertIsInstance(timestamp, datetime)
            self.assertItemsEqual(set(timestamps), timestamps)

        collection1 = metricCollections[1]
        collection2 = metricCollections[2]
        collection3 = metricCollections[3]
        collection4 = metricCollections[4]

        # COLLECTION-1:
        self.assertEqual(collection1.nextMetricTime, collection1.timeRange.end)
        metricGroups = defaultdict(list)
        for metricSlice in collection1.slices:
            checkSliceSorted(metricSlice.records)
            checkSliceUniqueTimestamps(metricSlice.records)
            for record in metricSlice.records:
                metricGroups[record.timestamp].append(
                    (metricSlice.instanceID, record.value))

        foundValues = False
        for _timestamp, values in metricGroups.iteritems():
            if len(values) >= 0:
                #print timestamp, values[:5]
                foundValues = True
                break

        self.assertTrue(foundValues)

        # COLLECTION-2:
        self.assertEqual(collection2.nextMetricTime, collection2.timeRange.end)
        metricGroups = defaultdict(list)
        for metricSlice in collection2.slices:
            checkSliceSorted(metricSlice.records)
            checkSliceUniqueTimestamps(metricSlice.records)
            for record in metricSlice.records:
                metricGroups[record.timestamp].append(
                    (metricSlice.instanceID, record.value))

        foundValues = False
        for _timestamp, values in metricGroups.iteritems():
            if len(values) >= 0:
                #print timestamp, values[:5]
                foundValues = True
                break

        self.assertTrue(foundValues)

        # COLLECTION-3:
        self.assertEqual(collection3.nextMetricTime, collection3.timeRange.end)
        metricGroups = defaultdict(list)
        metricTimestampInstanceHits = defaultdict(list)
        for metricSlice in collection3.slices:
            checkSliceSorted(metricSlice.records)
            checkSliceUniqueTimestamps(metricSlice.records)
            for record in metricSlice.records:
                metricGroups[record.timestamp].append(
                    (metricSlice.instanceID, record.value))
                metricTimestampInstanceHits[record.timestamp].append(
                    metricSlice.instanceID)

        foundAlignedItems = False
        for _timestamp, values in metricGroups.iteritems():
            if len(values) > 1:
                #print timestamp, values[:5]
                foundAlignedItems = True
                break

        self.assertTrue(foundAlignedItems)

        # Make sure there were no duplicate timestamps in any one slice
        for _timestamp, instances in metricTimestampInstanceHits.iteritems():
            self.assertItemsEqual(instances, set(instances))

        # COLLECTION-4 (there should be no matching instances for it):
        self.assertEqual(len(collection4.slices), 0)
        self.assertEqual(collection4.nextMetricTime, collection4.timeRange.end)
コード例 #35
0
  def testCollectAndPublishMetrics(self):
    # Start Metric Collector, create a set of Metrics, wait for it to collect
    # some metrics and to publish them to the metric_exchange, then validate
    # attributes of the published metrics.
    #
    # TODO Add more metric types
    # TODO Deeper validation of the published metrics

    # Start our own instance of metric collector and wait for data points
    with self._startModelSchedulerSubprocess() as modelSchedulerSubprocess, \
        self._startMetricCollectorSubprocess() as metricCollectorSubprocess:
      # Create some models for metric collector to harvest
      region = "us-west-2"
      namespace = "AWS/EC2"
      resourceType = ResourceTypeNames.EC2_INSTANCE

      engine = repository.engineFactory()
      adapter = createCloudwatchDatasourceAdapter()


      ec2Instances = adapter.describeResources(region=region,
                                               resourceType=resourceType)

      self.assertGreater(len(ec2Instances), 0)

      maxModels = 10

      ec2Instances = ec2Instances[:min(maxModels, Quota.getInstanceQuota())]

      metricInstances = []

      _LOGGER.info("Starting %d models", len(ec2Instances))
      self.assertGreater(len(ec2Instances), 0)
      for ec2Instance in ec2Instances:

        metricSpec = {"region": region,
                      "namespace": namespace,
                      "metric": "CPUUtilization",
                      "dimensions": {"InstanceId": ec2Instance["resID"]}}

        modelSpec = {"datasource": "cloudwatch",
                     "metricSpec": metricSpec}

        metricId = adapter.monitorMetric(modelSpec)

        with engine.connect() as conn:
          repository.setMetricStatus(conn, metricId, MetricStatus.ACTIVE)

        metricInstances.append(metricId)

      _LOGGER.info("Waiting for results from models...")

      seenMetricIDs = set()
      allMetricIDs = set(metricInstances)

      # Register a timeout so we won't deadlock the test
      def onTimeout(resultsQueueName):
        _LOGGER.error(
          "Timed out waiting to get results from models; numResults=%d; "
          "expected=%d", len(seenMetricIDs), len(allMetricIDs))

        # HACK delete model swapper results queue to abort the consumer
        try:
          with MessageBusConnector() as bus:
            bus.deleteMessageQueue(resultsQueueName)
        except Exception:
          _LOGGER.exception("Failed to delete results mq=%s", resultsQueueName)
          raise

      with ModelSwapperInterface() as modelSwapper:
        with modelSwapper.consumeResults() as consumer:
          timer = threading.Timer(120, onTimeout,
                                  args=[modelSwapper._resultsQueueName])
          timer.start()
          try:
            for batch in consumer:
              seenMetricIDs.add(batch.modelID)
              batch.ack()
              if seenMetricIDs == allMetricIDs:
                break
            else:
              self.fail(
                "Expected %d results, but got only %d: %s"
                % (len(allMetricIDs), len(seenMetricIDs), seenMetricIDs,))
            _LOGGER.info("Got %d results from models", len(seenMetricIDs))
          finally:
            timer.cancel()

      # Terminate metric_collector subprocess gracefully to avoid too much
      # error logging junk on the terminal
      metricCollectorSubprocess.send_signal(signal.SIGINT)

      # Terminate metric_collector subprocess gracefully to avoid too much
      # error logging junk on the terminal
      modelSchedulerSubprocess.send_signal(signal.SIGINT)
コード例 #36
0
  def testCollectMetricData(self):
    self.engine = repository.engineFactory(reset=True)

    with self.engine.connect() as conn:
      autostack1 = self._addAutostack(name="testCollectMetricData1",
                                      region="us-east-1",
                                      filters='{"tag:Name": ["*"]}')

      m1a = self._addAutostackMetric(conn, autostack1)
      m1b = self._addAutostackMetric(conn,
                                     autostack1,
                                     name="Autostacks/InstanceCount")

      autostack2 = self._addAutostack(name="testCollectMetricData2",
                                      region="us-west-2",
                                      filters='{"tag:Name": ["*?*"]}')

      m2 = self._addAutostackMetric(conn, autostack2)

      autostack3 = self._addAutostack(
        name="testCollectMetricData3",
        region="us-west-2",
        filters='{"tag:Name": ["NothingShouldMatchThis"]}')

      m3 = self._addAutostackMetric(conn, autostack3)

    # Collection data for both autostack/metric combinations
    collector = EC2InstanceMetricGetter()
    self.addCleanup(collector.close)

    requests = [
      AutostackMetricRequest(refID=1, autostack=autostack1, metric=m1a),
      AutostackMetricRequest(refID=2, autostack=autostack1, metric=m1b),
      AutostackMetricRequest(refID=3, autostack=autostack2, metric=m2),
      AutostackMetricRequest(refID=4, autostack=autostack3, metric=m3)
    ]

    metricCollections = dict(
      (collection.refID, collection)
      for collection in collector.collectMetricData(requests=requests))

    self.assertEqual(len(metricCollections), len(requests))


    def checkSliceSorted(records):
      sortedRecords = sorted(records, key=lambda record: record.timestamp)
      self.assertSequenceEqual(records, sortedRecords)

    def checkSliceUniqueTimestamps(records):
      timestamps = tuple(record.timestamp for record in records)
      for timestamp in timestamps:
        self.assertIsInstance(timestamp, datetime)
      self.assertItemsEqual(set(timestamps), timestamps)


    collection1 = metricCollections[1]
    collection2 = metricCollections[2]
    collection3 = metricCollections[3]
    collection4 = metricCollections[4]


    # COLLECTION-1:
    self.assertEqual(collection1.nextMetricTime, collection1.timeRange.end)
    metricGroups = defaultdict(list)
    for metricSlice in collection1.slices:
      checkSliceSorted(metricSlice.records)
      checkSliceUniqueTimestamps(metricSlice.records)
      for record in metricSlice.records:
        metricGroups[record.timestamp].append(
          (metricSlice.instanceID, record.value))

    foundValues = False
    for _timestamp, values in metricGroups.iteritems():
      if len(values) >= 0:
        #print timestamp, values[:5]
        foundValues = True
        break

    self.assertTrue(foundValues)


    # COLLECTION-2:
    self.assertEqual(collection2.nextMetricTime, collection2.timeRange.end)
    metricGroups = defaultdict(list)
    for metricSlice in collection2.slices:
      checkSliceSorted(metricSlice.records)
      checkSliceUniqueTimestamps(metricSlice.records)
      for record in metricSlice.records:
        metricGroups[record.timestamp].append(
          (metricSlice.instanceID, record.value))

    foundValues = False
    for _timestamp, values in metricGroups.iteritems():
      if len(values) >= 0:
        #print timestamp, values[:5]
        foundValues = True
        break

    self.assertTrue(foundValues)


    # COLLECTION-3:
    self.assertEqual(collection3.nextMetricTime, collection3.timeRange.end)
    metricGroups = defaultdict(list)
    metricTimestampInstanceHits = defaultdict(list)
    for metricSlice in collection3.slices:
      checkSliceSorted(metricSlice.records)
      checkSliceUniqueTimestamps(metricSlice.records)
      for record in metricSlice.records:
        metricGroups[record.timestamp].append((metricSlice.instanceID,
                                               record.value))
        metricTimestampInstanceHits[record.timestamp].append(
          metricSlice.instanceID)

    foundAlignedItems = False
    for _timestamp, values in metricGroups.iteritems():
      if len(values) > 1:
        #print timestamp, values[:5]
        foundAlignedItems = True
        break

    self.assertTrue(foundAlignedItems)

    # Make sure there were no duplicate timestamps in any one slice
    for _timestamp, instances in metricTimestampInstanceHits.iteritems():
      self.assertItemsEqual(instances, set(instances))


    # COLLECTION-4 (there should be no matching instances for it):
    self.assertEqual(len(collection4.slices), 0)
    self.assertEqual(collection4.nextMetricTime, collection4.timeRange.end)
コード例 #37
0
    def GET(self, period):
        """
    Get metrics, sorted by anomalies over specified period (hours)

    :param period: Period (hours) over which to consider anomalies for sort
      order
    :type period: int
    :returns: List of metrics
    :rtype: list

    Example request::

      GET /_anomalies/period/{period}

    Example response::

      [
        {
          "status": 1,
          "last_rowid": 4033,
          "display_name": "jenkins-master (us-west-2/AWS/EC2/i-12345678)",
          "description": "NetworkIn on EC2 instance i-12345678 in us-west-2",
          "name": "AWS/EC2/NetworkIn",
          "last_timestamp": "2014-04-14 20:29:00",
          "poll_interval": 300,
          "server": "us-west-2/AWS/EC2/i-12345678",
          "tag_name": "jenkins-master",
          "datasource": "cloudwatch",
          "location": "us-west-2",
          "message": null,
          "parameters": {
            "InstanceId": "i-12345678",
            "region": "us-west-2"
          },
          "uid": "0b6b97022fdb4134936aae92aa67393b"
        },
        ...
      ]

    """

        try:
            self.addStandardHeaders()

            engine = repository.engineFactory()

            with engine.connect() as conn:
                modelIterator = repository.getAllMetrics(
                    conn, fields=getMetricDisplayFields(conn))
                displayValuesMap = repository.getMetricIdsSortedByDisplayValue(
                    conn, period)

            # Keep track of the largest model display value for each server
            serverValues = defaultdict(float)

            modelsList = []

            for model in modelIterator:
                val = displayValuesMap.get(model.uid)
                if val is not None:
                    serverValues[model.server] = max(
                        float(val), serverValues[model.server])
                modelsList.append(convertMetricRowToMetricDict(model))

            # Sort by the primary key. The order within each server is preserved
            # from previous sort.
            def getModelRankByServer(model):
                return (-serverValues[model["server"]], model["server"],
                        model["name"])

            modelsList = sorted(modelsList, key=getModelRankByServer)

            return utils.jsonEncode(modelsList)

        except (web.HTTPError) as ex:
            log.info(str(ex) or repr(ex))
            raise ex

        except Exception as ex:
            log.exception("GET Failed")
            raise web.internalerror(str(ex) or repr(ex))
コード例 #38
0
 def setUpClass(cls):
   # Load YOMP API Key as required by TestCaseBase
   cls.apiKey = YOMP.app.config.get("security", "apikey")
   cls.engine = repository.engineFactory()
コード例 #39
0
ファイル: notification_service.py プロジェクト: darian19/what
    def messageHandler(self, message):
        """ Inspect all inbound model results in a batch for anomaly thresholds and
        trigger notifications where applicable.

        :param amqp.messages.ConsumerMessage message: ``message.body`` is a
          serialized batch of model inference results generated in
          ``AnomalyService`` and must be deserialized using
          ``AnomalyService.deserializeModelResult()``. The message conforms to
          htmengine/runtime/json_schema/model_inference_results_msg_schema.json
    """
        if message.properties.headers and "dataType" in message.properties.headers:
            # Not a model inference result
            return

        YOMP.app.config.loadConfig()  # reload config on every batch
        engine = repository.engineFactory()
        # Cache minimum threshold to trigger any notification to avoid permuting
        # settings x metricDataRows
        try:
            try:
                batch = AnomalyService.deserializeModelResult(message.body)
            except Exception:
                self._log.exception("Error deserializing model result")
                raise

            # Load all settings for all users (once per incoming batch)
            with engine.connect() as conn:
                settings = repository.retryOnTransientErrors(repository.getAllNotificationSettings)(conn)

            self._log.debug("settings: %r" % settings)

            if settings:
                minThreshold = min(setting.sensitivity for setting in settings)
            else:
                minThreshold = 0.99999

            metricInfo = batch["metric"]
            metricId = metricInfo["uid"]
            resource = metricInfo["resource"]

            for row in batch["results"]:

                if row["anomaly"] >= minThreshold:
                    for settingObj in settings:
                        if row["rowid"] <= 1000:
                            continue  # Not enough data

                        rowDatetime = datetime.utcfromtimestamp(row["ts"])

                        if rowDatetime < datetime.utcnow() - timedelta(seconds=3600):
                            continue  # Skip old

                        if row["anomaly"] >= settingObj.sensitivity:
                            # First let's clear any old users out of the database.
                            with engine.connect() as conn:
                                repository.retryOnTransientErrors(repository.deleteStaleNotificationDevices)(
                                    conn, _NOTIFICATION_DEVICE_STALE_DAYS
                                )

                            # If anomaly_score meets or exceeds any of the device
                            # notification sensitivity settings, trigger notification.
                            # repository.addNotification() will handle throttling.
                            notificationId = str(uuid.uuid4())

                            with engine.connect() as conn:
                                result = repository.retryOnTransientErrors(repository.addNotification)(
                                    conn,
                                    uid=notificationId,
                                    server=resource,
                                    metric=metricId,
                                    rowid=row["rowid"],
                                    device=settingObj.uid,
                                    windowsize=(settingObj.windowsize),
                                    timestamp=rowDatetime,
                                    acknowledged=0,
                                    seen=0,
                                )

                            self._log.info(
                                "NOTIFICATION=%s SERVER=%s METRICID=%s DEVICE=%s "
                                "Notification generated. " % (notificationId, resource, metricId, settingObj.uid)
                            )

                            if result is not None and result.rowcount > 0 and settingObj.email_addr:
                                # Notification was generated.  Attempt to send email
                                with engine.connect() as conn:
                                    notificationObj = repository.getNotification(conn, notificationId)

                                self.sendNotificationEmail(engine, settingObj, notificationObj)

                    if not settings:
                        # There are no device notification settings stored on this server,
                        # no notifications will be generated.  However, log that a
                        # an anomaly was detected and notification would be sent if there
                        # were any configured devices
                        self._log.info(
                            "<%r>" % (metricInfo)
                            + ("{TAG:APP.NOTIFICATION} Anomaly " "detected at %s, but no devices are " "configured.")
                            % rowDatetime
                        )

        finally:
            message.ack()

        # Do cleanup
        with engine.connect() as conn:
            repository.clearOldNotifications(conn)  # Delete all notifications outside
コード例 #40
0
ファイル: models_api_test.py プロジェクト: darian19/what
    def setUpClass(cls):
        """
    Setup steps for all test cases.
    Focus for these is to cover all API checks for ModelDataHandler.
    Hence, this does all setup creating metric, waiting for
    metricData across all testcases, all API call for querying metricData
    will be against single metric created in setup
    Setup Process
    1) Update conf with aws credentials, ManagedTempRepository will not
       work in this test
    2) Select test instance such that its running from longer time,
       We are using instance older than 15 days
    3) Create Metric, wait for min metricData rows to become available
       Set to 100, configurable
    4) Pick testRowId, set it lower value this will make sure to have
       Non NULL value for anomaly_score field for given row while invoking
       GET with consitions, set to 5
    5) Decide queryParams for anomalyScore, to and from timestamp
    """
        cls.headers = getDefaultHTTPHeaders(YOMP.app.config)

        # All other sevices needs AWS credentials to work
        # Set AWS credentials
        YOMP.app.config.loadConfig()

        # Select test instance such that its running from longer time
        g_logger.info("Getting long-running EC2 Instances")
        instances = aws_utils.getLongRunningEC2Instances(
            "us-west-2",
            YOMP.app.config.get("aws", "aws_access_key_id"),
            YOMP.app.config.get("aws", "aws_secret_access_key"),
            15,
        )
        testInstance = instances[randrange(1, len(instances))]

        createModelData = {
            "region": "us-west-2",
            "namespace": "AWS/EC2",
            "datasource": "cloudwatch",
            "metric": "CPUUtilization",
            "dimensions": {"InstanceId": testInstance.id},
        }

        # Number of minimum rows
        cls.minDataRows = 100

        cls.app = TestApp(models_api.app.wsgifunc())

        # create test metric
        g_logger.info("Creating test metric; modelSpec=%s", createModelData)
        response = cls.app.put("/", utils.jsonEncode(createModelData), headers=cls.headers)
        postResult = utils.jsonDecode(response.body)
        maxWaitTime = 600
        waitTimeMetricData = 0
        waitAnomalyScore = 0

        # Wait for enough metric data to be available
        cls.uid = postResult[0]["uid"]
        engine = repository.engineFactory()
        with engine.connect() as conn:
            cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)]
        with engine.connect() as conn:
            cls.testMetric = repository.getMetric(conn, cls.uid)

        # Confirm that we have enough metricData
        g_logger.info("Waiting for metric data")
        while len(cls.metricData) < cls.minDataRows and waitTimeMetricData < maxWaitTime:
            g_logger.info("not ready, waiting for metric data: got %d of %d ...", len(cls.metricData), cls.minDataRows)
            time.sleep(5)
            waitTimeMetricData += 5
            with engine.connect() as conn:
                cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)]

        # taking lower value for testRowId, this will make sure to have
        # Non NULL value for anomaly_score field for given row
        cls.testRowId = 5

        with engine.connect() as conn:
            cls.testMetricRow = repository.getMetricData(conn, cls.uid, rowid=cls.testRowId).fetchone()

        # Make sure we did not receive None etc for anomaly score
        g_logger.info("cls.testMetricRow.anomaly_score=%r", cls.testMetricRow.anomaly_score)
        g_logger.info("waitAnomalyScore=%r", waitAnomalyScore)
        while cls.testMetricRow.anomaly_score is None and waitAnomalyScore < maxWaitTime:
            g_logger.info("anomaly_score not ready, sleeping...")
            time.sleep(5)
            waitAnomalyScore += 5
            with engine.connect() as conn:
                cls.testMetricRow = repository.getMetricData(conn, cls.uid, rowid=cls.testRowId).fetchone()

        # Decide queryParams for anomalyScore, to and from timestamp
        cls.testAnomalyScore = cls.testMetricRow.anomaly_score
        cls.testTimeStamp = cls.testMetricRow.timestamp
コード例 #41
0
    def testCollectAndPublishMetrics(self):
        # Start Metric Collector, create a set of Metrics, wait for it to collect
        # some metrics and to publish them to the metric_exchange, then validate
        # attributes of the published metrics.
        #
        # TODO Add more metric types
        # TODO Deeper validation of the published metrics

        # Start our own instance of metric collector and wait for data points
        with self._startModelSchedulerSubprocess() as modelSchedulerSubprocess, \
            self._startMetricCollectorSubprocess() as metricCollectorSubprocess:
            # Create some models for metric collector to harvest
            region = "us-west-2"
            namespace = "AWS/EC2"
            resourceType = ResourceTypeNames.EC2_INSTANCE

            engine = repository.engineFactory()
            adapter = createCloudwatchDatasourceAdapter()

            ec2Instances = adapter.describeResources(region=region,
                                                     resourceType=resourceType)

            self.assertGreater(len(ec2Instances), 0)

            maxModels = 10

            ec2Instances = ec2Instances[:min(maxModels, Quota.getInstanceQuota(
            ))]

            metricInstances = []

            _LOGGER.info("Starting %d models", len(ec2Instances))
            self.assertGreater(len(ec2Instances), 0)
            for ec2Instance in ec2Instances:

                metricSpec = {
                    "region": region,
                    "namespace": namespace,
                    "metric": "CPUUtilization",
                    "dimensions": {
                        "InstanceId": ec2Instance["resID"]
                    }
                }

                modelSpec = {
                    "datasource": "cloudwatch",
                    "metricSpec": metricSpec
                }

                metricId = adapter.monitorMetric(modelSpec)

                with engine.connect() as conn:
                    repository.setMetricStatus(conn, metricId,
                                               MetricStatus.ACTIVE)

                metricInstances.append(metricId)

            _LOGGER.info("Waiting for results from models...")

            seenMetricIDs = set()
            allMetricIDs = set(metricInstances)

            # Register a timeout so we won't deadlock the test
            def onTimeout(resultsQueueName):
                _LOGGER.error(
                    "Timed out waiting to get results from models; numResults=%d; "
                    "expected=%d", len(seenMetricIDs), len(allMetricIDs))

                # HACK delete model swapper results queue to abort the consumer
                try:
                    with MessageBusConnector() as bus:
                        bus.deleteMessageQueue(resultsQueueName)
                except Exception:
                    _LOGGER.exception("Failed to delete results mq=%s",
                                      resultsQueueName)
                    raise

            with ModelSwapperInterface() as modelSwapper:
                with modelSwapper.consumeResults() as consumer:
                    timer = threading.Timer(
                        120, onTimeout, args=[modelSwapper._resultsQueueName])
                    timer.start()
                    try:
                        for batch in consumer:
                            seenMetricIDs.add(batch.modelID)
                            batch.ack()
                            if seenMetricIDs == allMetricIDs:
                                break
                        else:
                            self.fail(
                                "Expected %d results, but got only %d: %s" % (
                                    len(allMetricIDs),
                                    len(seenMetricIDs),
                                    seenMetricIDs,
                                ))
                        _LOGGER.info("Got %d results from models",
                                     len(seenMetricIDs))
                    finally:
                        timer.cancel()

            # Terminate metric_collector subprocess gracefully to avoid too much
            # error logging junk on the terminal
            metricCollectorSubprocess.send_signal(signal.SIGINT)

            # Terminate metric_collector subprocess gracefully to avoid too much
            # error logging junk on the terminal
            modelSchedulerSubprocess.send_signal(signal.SIGINT)
コード例 #42
0
    def GET(self):
        """
    Get metrics, sorted by AWS name tag / instance ID

    :returns: List of metrics
    :rtype: list

    Example request::

      GET /_anomalies/name

    Example response::

      [
        {
          "status": 1,
          "last_rowid": 4033,
          "display_name": "jenkins-master (us-west-2/AWS/EC2/i-12345678)",
          "description": "NetworkIn on EC2 instance i-12345678 in us-west-2",
          "name": "AWS/EC2/NetworkIn",
          "last_timestamp": "2014-04-14 20:29:00",
          "poll_interval": 300,
          "server": "us-west-2/AWS/EC2/i-12345678",
          "tag_name": "jenkins-master",
          "datasource": "cloudwatch",
          "location": "us-west-2",
          "message": null,
          "parameters": {
            "InstanceId": "i-12345678",
            "region": "us-west-2"
          },
          "uid": "0b6b97022fdb4134936aae92aa67393b"
        },
        ...
      ]

    """

        try:
            self.addStandardHeaders()

            engine = repository.engineFactory()

            with engine.connect() as conn:
                modelIterator = repository.getAllMetrics(
                    conn, fields=getMetricDisplayFields(conn))
                modelsList = [
                    convertMetricRowToMetricDict(model)
                    for model in modelIterator
                ]

            # Sort by tag_name, and then parameters=>InstanceID
            def cmpFn(model1, model2):
                name1 = model1["tag_name"]
                name2 = model2["tag_name"]
                id1 = model1["parameters"].get("InstanceID")
                id2 = model2["parameters"].get("InstanceID")

                if name1 and not name2:
                    return -1
                elif name2 and not name1:
                    return 1
                elif name1 != name2:
                    return cmp(name1, name2)
                elif id1 and not id2:
                    return -1
                elif id2 and not id1:
                    return 1
                elif id1 != id2:
                    return cmp(id1, id2)
                return 0

            modelsList.sort(cmpFn)

            return utils.jsonEncode(modelsList)

        except (web.HTTPError) as ex:
            log.info(str(ex) or repr(ex))
            raise ex

        except Exception as ex:
            log.exception("GET Failed")
            raise web.internalerror(str(ex) or repr(ex))
コード例 #43
0
ファイル: anomalies_api.py プロジェクト: darian19/what
  def GET(self, period):
    """
    Get metrics, sorted by anomalies over specified period (hours)

    :param period: Period (hours) over which to consider anomalies for sort
      order
    :type period: int
    :returns: List of metrics
    :rtype: list

    Example request::

      GET /_anomalies/period/{period}

    Example response::

      [
        {
          "status": 1,
          "last_rowid": 4033,
          "display_name": "jenkins-master (us-west-2/AWS/EC2/i-12345678)",
          "description": "NetworkIn on EC2 instance i-12345678 in us-west-2",
          "name": "AWS/EC2/NetworkIn",
          "last_timestamp": "2014-04-14 20:29:00",
          "poll_interval": 300,
          "server": "us-west-2/AWS/EC2/i-12345678",
          "tag_name": "jenkins-master",
          "datasource": "cloudwatch",
          "location": "us-west-2",
          "message": null,
          "parameters": {
            "InstanceId": "i-12345678",
            "region": "us-west-2"
          },
          "uid": "0b6b97022fdb4134936aae92aa67393b"
        },
        ...
      ]

    """

    try:
      self.addStandardHeaders()

      engine = repository.engineFactory()

      with engine.connect() as conn:
        modelIterator = repository.getAllMetrics(conn, fields=getMetricDisplayFields(conn))
        displayValuesMap = repository.getMetricIdsSortedByDisplayValue(conn, period)

      # Keep track of the largest model display value for each server
      serverValues = defaultdict(float)

      modelsList = []

      for model in modelIterator:
        val = displayValuesMap.get(model.uid)
        if val is not None:
          serverValues[model.server] = max(float(val),
                                           serverValues[model.server])
        modelsList.append(convertMetricRowToMetricDict(model))

      # Sort by the primary key. The order within each server is preserved
      # from previous sort.
      def getModelRankByServer(model):
        return (-serverValues[model["server"]], model["server"], model["name"])
      modelsList = sorted(modelsList, key=getModelRankByServer)

      return utils.jsonEncode(modelsList)

    except (web.HTTPError) as ex:
      log.info(str(ex) or repr(ex))
      raise ex

    except Exception as ex:
      log.exception("GET Failed")
      raise web.internalerror(str(ex) or repr(ex))
コード例 #44
0
ファイル: __init__.py プロジェクト: bopopescu/what
 def _connect():
   """ Explicitly checks out a connection from the sqlalchemy engine for use
   inside web handler via web.ctx
   """
   web.ctx.connFactory = repository.engineFactory().connect
コード例 #45
0
    def run(self):
        """ Collect metric data and status for active metrics
    """
        # NOTE: the process pool must be created BEFORE this main (parent) process
        # creates any global or class-level shared resources (e.g., boto
        # connection) that would have undersirable consequences when
        # replicated into and used by forked child processes (e.g., the same MySQL
        # connection socket file descriptor used by multiple processes). And we
        # can't take advantage of the process Pool's maxtasksperchild feature
        # either (for the same reason)
        self._log.info("Starting YOMP Metric Collector")
        resultsQueue = multiprocessing.Manager().JoinableQueue()

        recvPipe, sendPipe = multiprocessing.Pipe(False)

        processPool = multiprocessing.Pool(
            processes=self._WORKER_PROCESS_POOL_SIZE, maxtasksperchild=None)

        try:
            with ModelSwapperInterface() as modelSwapper:
                engine = repository.engineFactory()
                while True:
                    startTime = time.time()

                    if startTime > self._nextCacheGarbageCollectionTime:
                        # TODO: unit-test
                        self._garbageCollectInfoCache()

                    # Determine which metrics are due for an update
                    metricsToUpdate = self._getCandidateMetrics(engine)

                    filterDuration = time.time() - startTime

                    if not metricsToUpdate:
                        time.sleep(self._NO_PENDING_METRICS_SLEEP_SEC)
                        continue

                    # Collect metric data
                    collectionStartTime = time.time()

                    poolResults = self._collectDataForMetrics(
                        metricsToUpdate, processPool, resultsQueue)

                    # Process/dispatch results in parallel in another thread as results
                    # become available in resultsQueue
                    dispatchThread = (threading.Thread(
                        target=self._processAndDispatchThreadTarget,
                        args=(engine, metricsToUpdate, resultsQueue,
                              modelSwapper, sendPipe)))
                    dispatchStartTime = time.time()
                    dispatchThread.start()

                    # Syncronize with processPool
                    poolResults.wait()  # Wait for collection tasks to complete

                    metricPollDuration = time.time() - collectionStartTime

                    resultsQueue.join()  # Block until all tasks completed...

                    # Syncronize with dispatchThread
                    resultsQueue.put(
                        self._SENTINEL)  # Signal to dispatchThread that
                    # there are no more results to
                    # process.
                    resultsQueue.join()
                    numEmpty, numErrors = recvPipe.recv(
                    )  # Get dispatchThread stats

                    dispatchDuration = time.time() - dispatchStartTime

                    self._log.info(
                        "Processed numMetrics=%d; numEmpty=%d; numErrors=%d; "
                        "duration=%.4fs (filter=%.4fs; query=%.4fs; dispatch=%.4fs)",
                        len(metricsToUpdate), numEmpty, numErrors,
                        time.time() - startTime, filterDuration,
                        metricPollDuration, dispatchDuration)
        finally:
            self._log.info("Exiting Metric Collector run-loop")
            processPool.terminate()
            processPool.join()
コード例 #46
0
    def testCollectMetricStatistics(self):

        expectedStatisticNames = ["min", "max"]

        def validateStats(stats):
            self.assertIsInstance(stats, (list, tuple))

            timestamps = []
            for instanceMetrics in stats:
                self.assertEqual(len(instanceMetrics.records), 1)
                record = instanceMetrics.records[0]
                self.assertIsInstance(record.value, dict)
                self.assertGreater(len(record.value), 0)
                self.assertTrue(set(
                    record.value.iterkeys()).issubset(expectedStatisticNames),
                                msg=record.value)

                for metricValue in record.value.itervalues():
                    self.assertIsInstance(metricValue,
                                          float,
                                          msg=instanceMetrics)
                    self.assertFalse(math.isnan(metricValue))

                timestamps.append(record.timestamp)

            # Verify that all the stats timestamps are the same
            if timestamps:
                self.assertSequenceEqual(timestamps,
                                         [timestamps[0]] * len(timestamps))

        # Collection data for both autostack/metric combinations
        collector = EC2InstanceMetricGetter()
        self.addCleanup(collector.close)

        def _createAutostackMetric(conn, name, region, filters):
            autostackDict = repository.addAutostack(
                conn, name=name, region=region, filters=json.dumps(filters))

            modelSpec = {
                "modelParams": {},
                "datasource": "autostack",
                "metricSpec": {
                    "slaveDatasource": "cloudwatch",
                    "slaveMetric": {
                        "metric": "CPUUtilization",
                        "namespace": "AWS/EC2"
                    },
                    "autostackId": autostackDict["uid"]
                }
            }

            metricDict = repository.addMetric(
                conn,
                datasource="autostack",
                name="CPUUtilization",
                description=(
                    "CPUUtilization on YOMP Autostack {0} in us-west-2 "
                    "region").format(name),
                server="Autostacks/{0}".format(autostackDict["uid"]),
                location=region,
                tag_name=name,
                parameters=htmengine.utils.jsonEncode(modelSpec),
                poll_interval=300,
                status=MetricStatus.UNMONITORED)

            repository.addMetricToAutostack(conn, autostackDict["uid"],
                                            metricDict["uid"])

            autostackObj = type("MutableAutostack", (object, ),
                                autostackDict)()
            autostackObj.filters = json.loads(autostackObj.filters)

            metricObj = type("MutableMetric", (object, ), metricDict)()

            return autostackObj, metricObj

        # All instances in us-east-1
        engine = repository.engineFactory()
        with engine.begin() as conn:
            autostack1, m1 = (_createAutostackMetric(
                conn,
                name="testCollectMetricStats1",
                region="us-east-1",
                filters={"tag:Name": ["*"]}))

            stats1 = collector.collectMetricStatistics(autostack=autostack1,
                                                       metric=m1)
            print "STATS1:", stats1

            validateStats(stats1)
            self.assertGreaterEqual(len(stats1), 1)

            # All instances in us-west-2
            autostack2, m2 = _createAutostackMetric(
                conn,
                name="testCollectMetricStats2",
                region="us-west-2",
                filters={"tag:Name": ["*"]})

            stats2 = collector.collectMetricStatistics(autostack=autostack2,
                                                       metric=m2)
            print "STATS2:", stats2
            validateStats(stats2)
            self.assertGreater(len(stats2), 1)

            # No matching instances in us-west-2
            autostack3, m3 = (_createAutostackMetric(
                conn,
                name="testCollectMetricStatistics3",
                region="us-west-2",
                filters={"tag:Name": ["NothingShouldMatchThis"]}))

            stats3 = collector.collectMetricStatistics(autostack=autostack3,
                                                       metric=m3)
            print "STATS3:", stats3
            validateStats(stats3)
            self.assertEqual(len(stats3), 0)
コード例 #47
0
ファイル: models_api_test.py プロジェクト: bopopescu/what
  def setUpClass(cls):
    """
    Setup steps for all test cases.
    Focus for these is to cover all API checks for ModelDataHandler.
    Hence, this does all setup creating metric, waiting for
    metricData across all testcases, all API call for querying metricData
    will be against single metric created in setup
    Setup Process
    1) Update conf with aws credentials, ManagedTempRepository will not
       work in this test
    2) Select test instance such that its running from longer time,
       We are using instance older than 15 days
    3) Create Metric, wait for min metricData rows to become available
       Set to 100, configurable
    4) Pick testRowId, set it lower value this will make sure to have
       Non NULL value for anomaly_score field for given row while invoking
       GET with consitions, set to 5
    5) Decide queryParams for anomalyScore, to and from timestamp
    """
    cls.headers = getDefaultHTTPHeaders(YOMP.app.config)

    # All other sevices needs AWS credentials to work
    # Set AWS credentials
    YOMP.app.config.loadConfig()

    # Select test instance such that its running from longer time
    g_logger.info("Getting long-running EC2 Instances")
    instances = aws_utils.getLongRunningEC2Instances("us-west-2",
      YOMP.app.config.get("aws", "aws_access_key_id"),
      YOMP.app.config.get("aws", "aws_secret_access_key"), 15)
    testInstance = instances[randrange(1, len(instances))]

    createModelData = {
      "region": "us-west-2",
      "namespace": "AWS/EC2",
      "datasource": "cloudwatch",
      "metric": "CPUUtilization",
      "dimensions": {
        "InstanceId": testInstance.id
      }
    }

    # Number of minimum rows
    cls.minDataRows = 100

    cls.app = TestApp(models_api.app.wsgifunc())

    # create test metric
    g_logger.info("Creating test metric; modelSpec=%s", createModelData)
    response = cls.app.put("/", utils.jsonEncode(createModelData),
     headers=cls.headers)
    postResult = utils.jsonDecode(response.body)
    maxWaitTime = 600
    waitTimeMetricData = 0
    waitAnomalyScore = 0


    # Wait for enough metric data to be available
    cls.uid = postResult[0]["uid"]
    engine = repository.engineFactory()
    with engine.connect() as conn:
      cls.metricData = [row for row
                         in repository.getMetricData(conn, cls.uid)]
    with engine.connect() as conn:
      cls.testMetric = repository.getMetric(conn, cls.uid)

    # Confirm that we have enough metricData
    g_logger.info("Waiting for metric data")
    while (len(cls.metricData) < cls.minDataRows and
           waitTimeMetricData < maxWaitTime):
      g_logger.info("not ready, waiting for metric data: got %d of %d ...",
                    len(cls.metricData), cls.minDataRows)
      time.sleep(5)
      waitTimeMetricData += 5
      with engine.connect() as conn:
        cls.metricData = [row for row
                           in repository.getMetricData(conn, cls.uid)]

    # taking lower value for testRowId, this will make sure to have
    # Non NULL value for anomaly_score field for given row
    cls.testRowId = 5

    with engine.connect() as conn:
      cls.testMetricRow = (repository.getMetricData(conn,
                                                     cls.uid,
                                                     rowid=cls.testRowId)
                          .fetchone())

    # Make sure we did not receive None etc for anomaly score
    g_logger.info("cls.testMetricRow.anomaly_score=%r",
                  cls.testMetricRow.anomaly_score)
    g_logger.info("waitAnomalyScore=%r", waitAnomalyScore)
    while (cls.testMetricRow.anomaly_score is None and
           waitAnomalyScore < maxWaitTime):
      g_logger.info("anomaly_score not ready, sleeping...")
      time.sleep(5)
      waitAnomalyScore += 5
      with engine.connect() as conn:
        cls.testMetricRow = (repository.getMetricData(conn,
                                                      cls.uid,
                                                      rowid=cls.testRowId)
                            .fetchone())

    # Decide queryParams for anomalyScore, to and from timestamp
    cls.testAnomalyScore = cls.testMetricRow.anomaly_score
    cls.testTimeStamp = cls.testMetricRow.timestamp
コード例 #48
0
def getStatistics(metric):
    """Get aggregate statistics for an Autostack metric.

  The metric must belong to an Autostack or a ValueError will be raised. If AWS
  returns no stats and there is no data in the database then an
  ObjectNotFoundError will be raised.

  :param metric: the Autostack metric to get statistics for
  :type metric: TODO

  :returns: metric statistics
  :rtype: dict {"min": minVal, "max": maxVal}

  :raises: ValueError if the metric doesn't not belong to an Autostack

  :raises: YOMP.app.exceptions.ObjectNotFoundError if the metric or the
      corresponding autostack doesn't exist; this may happen if it got deleted
      by another process in the meantime.

  :raises: YOMP.app.exceptions.MetricStatisticsNotReadyError if there are no or
      insufficent samples at this time; this may also happen if the metric and
      its data were deleted by another process in the meantime
  """
    engine = repository.engineFactory()

    if metric.datasource != "autostack":
        raise ValueError(
            "Metric must belong to an Autostack but has datasource=%r" %
            metric.datasource)
    metricGetter = EC2InstanceMetricGetter()
    try:
        with engine.connect() as conn:
            autostack = repository.getAutostackFromMetric(conn, metric.uid)
        instanceMetricList = metricGetter.collectMetricStatistics(
            autostack, metric)
    finally:
        metricGetter.close()

    n = 0
    mins = 0.0
    maxs = 0.0
    for instanceMetric in instanceMetricList:
        assert len(instanceMetric.records) == 1
        metricRecord = instanceMetric.records[0]
        stats = metricRecord.value

        if (not isinstance(stats["min"], numbers.Number)
                or math.isnan(stats["min"])
                or not isinstance(stats["max"], numbers.Number)
                or math.isnan(stats["max"])):
            # Cloudwatch gave us bogus data for this metric so we will exclude it
            continue

        mins += stats["min"]
        maxs += stats["max"]
        n += 1

    if n == 0:
        # Fall back to metric_data when we don't get anything from AWS. This may
        # raise an MetricStatisticsNotReadyError if there is no or not enough data.
        with engine.connect() as conn:
            dbStats = repository.getMetricStats(conn, metric.uid)
        minVal = dbStats["min"]
        maxVal = dbStats["max"]
    else:
        minVal = mins / n
        maxVal = maxs / n

    # Now add the 20% buffer on the range
    buff = (maxVal - minVal) * 0.2
    minVal -= buff
    maxVal += buff

    return {"min": minVal, "max": maxVal}
コード例 #49
0
    def POST(cls):
        """Upload the metric info and metric data as a compressed tarfile to S3.

    The request must include the uid of the metric and may include other JSON
    keys as well. For instance, it is likely that a request from the mobile
    application will include information about the current view and data
    being displayed when the feedback request is sent. Any fields in addition
    to uid will be stored with the feedback archive file that is uploaded to
    S3.
    """
        inputData = json.loads(web.data())
        # Get the metric uid
        uid = inputData["uid"]
        del inputData["uid"]

        inputData["server_id"] = _MACHINE_ID

        # Data is written to a temporary directory before uploading
        path = tempfile.mkdtemp()

        try:
            # Retrieve the metric table record and add it to the other input
            # parameters
            metricFields = [
                schema.metric.c.uid, schema.metric.c.datasource,
                schema.metric.c.name, schema.metric.c.description,
                schema.metric.c.server, schema.metric.c.location,
                schema.metric.c.parameters, schema.metric.c.status,
                schema.metric.c.message, schema.metric.c.last_timestamp,
                schema.metric.c.poll_interval, schema.metric.c.tag_name,
                schema.metric.c.last_rowid
            ]

            with repository.engineFactory().connect() as conn:
                metricRow = repository.getMetric(conn, uid, metricFields)
            metric = dict([
                (col.name, utils.jsonDecode(getattr(metricRow, col.name))
                 if col.name == "parameters" else getattr(metricRow, col.name))
                for col in metricFields
            ])
            if metric["tag_name"]:
                metric["display_name"] = "%s (%s)" % (metric["tag_name"],
                                                      metric["server"])
            else:
                metric["display_name"] = metric["server"]

            inputData["metric"] = utils.jsonEncode(metric)

            metricPath = os.path.join(path, "metric.json")
            with open(metricPath, "w") as f:
                json.dump(inputData, f)

            # Retrieve the metric data
            with repository.engineFactory().connect() as conn:
                metricDataRows = repository.getMetricData(conn, uid)
            metricData = [
                dict([(col.name, getattr(metricData, col.name))
                      for col in schema.metric_data.columns])
                for metricData in metricDataRows
            ]

            metricDataPath = os.path.join(path, "metric_data.csv")
            with open(metricDataPath, "w") as f:
                writer = csv.writer(f)
                if len(metricData) > 0:
                    header = metricData[0].keys()
                    # Write the field names first
                    writer.writerow(header)
                    # Then write out the data for each row
                    for dataDict in metricData:
                        row = [dataDict[h] for h in header]
                        writer.writerow(row)

            # Create a tarfile to upload
            ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
            filename = "metric_dump_%s_%s.tar.gz" % (uid, ts)
            tfPath = os.path.join(path, filename)
            with tarfile.open(tfPath, "w:gz") as tf:
                tf.add(metricPath, arcname=os.path.basename(metricPath))
                tf.add(metricDataPath,
                       arcname=os.path.basename(metricDataPath))

            # Upload the tarfile
            return cls._uploadTarfile(filename, tfPath)

        finally:
            shutil.rmtree(path)