def testMetricDataForRandomRowID(uid):
      '''
        This tests if the metric data returned by the GET call :
          _models/<uid>/data
        has anomaly_score consistent with what is there in the actual
        database by asserting it against a dao.MetricData.get() call
        It repeats the process for 5 random sample rows for each uid
        in the database.

        Algorithm :
        - Query the MetricDataHandler GET call for a certain uid
        - Check if response is OK
        - Find the last row id for the uid
        - Select a random row between 1 and last row id
        - Find the anomaly score for that row id
        - Assert on the anomaly score
      '''
      response = self.app.get("/%s/data" %uid, headers=self.headers)
      assertions.assertSuccess(self, response)
      getAllModelsResult = utils.jsonDecode(response.body)
      with repository.engineFactory().connect() as conn:
        lastRowID = repository.getMetric(conn, uid).last_rowid
      for _ in range(5):
        randomRowID = randrange(1, lastRowID)
        with repository.engineFactory().connect() as conn:
          singleMetricData = repository.getMetricData(
            conn,
            uid,
            rowid=randomRowID).first()
        metricData = getMetricDataWithRowID(getAllModelsResult['data'],
          randomRowID)
        self.assertEqual(metricData[2], singleMetricData.anomaly_score)
        self.assertEqual(datetime.strptime(metricData[0],
          '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp)
Example #2
0
  def setUpClass(cls):
    """
    Setup steps for all test cases.
    Focus for these is to cover all API checks for ModelDataHandler.
    Hence, this does all setup creating metric, waiting for
    metricData across all testcases, all API call for querying metricData
    will be against single metric created in setup
    Setup Process
    1) Update conf with aws credentials, ManagedTempRepository will not
       work in this test
    2) Select test instance such that its running from longer time,
       We are using instance older than 15 days
    3) Create Metric, wait for min metricData rows to become available
       Set to 100, configurable
    4) Pick testRowId, set it lower value this will make sure to have
       Non NULL value for anomaly_score field for given row while invoking
       GET with consitions, set to 5
    5) Decide queryParams for anomalyScore, to and from timestamp
    """
    cls.headers = getDefaultHTTPHeaders(htm.it.app.config)

    # All other sevices needs AWS credentials to work
    # Set AWS credentials
    htm.it.app.config.loadConfig()

    # Select test instance such that its running from longer time
    g_logger.info("Getting long-running EC2 Instances")
    instances = aws_utils.getLongRunningEC2Instances("us-west-2",
      htm.it.app.config.get("aws", "aws_access_key_id"),
      htm.it.app.config.get("aws", "aws_secret_access_key"), 15)
    testInstance = instances[randrange(1, len(instances))]

    createModelData = {
      "region": "us-west-2",
      "namespace": "AWS/EC2",
      "datasource": "cloudwatch",
      "metric": "CPUUtilization",
      "dimensions": {
        "InstanceId": testInstance.id
      }
    }

    # Number of minimum rows
    cls.minDataRows = 100

    cls.app = TestApp(models_api.app.wsgifunc())

    # create test metric
    g_logger.info("Creating test metric; modelSpec=%s", createModelData)
    response = cls.app.put("/", utils.jsonEncode(createModelData),
     headers=cls.headers)
    postResult = utils.jsonDecode(response.body)
    maxWaitTime = 600
    waitTimeMetricData = 0
    waitAnomalyScore = 0


    # Wait for enough metric data to be available
    cls.uid = postResult[0]["uid"]
    engine = repository.engineFactory()
    with engine.connect() as conn:
      cls.metricData = [row for row
                         in repository.getMetricData(conn, cls.uid)]
    with engine.connect() as conn:
      cls.testMetric = repository.getMetric(conn, cls.uid)

    # Confirm that we have enough metricData
    g_logger.info("Waiting for metric data")
    while (len(cls.metricData) < cls.minDataRows and
           waitTimeMetricData < maxWaitTime):
      g_logger.info("not ready, waiting for metric data: got %d of %d ...",
                    len(cls.metricData), cls.minDataRows)
      time.sleep(5)
      waitTimeMetricData += 5
      with engine.connect() as conn:
        cls.metricData = [row for row
                           in repository.getMetricData(conn, cls.uid)]

    # taking lower value for testRowId, this will make sure to have
    # Non NULL value for anomaly_score field for given row
    cls.testRowId = 5

    with engine.connect() as conn:
      cls.testMetricRow = (repository.getMetricData(conn,
                                                     cls.uid,
                                                     rowid=cls.testRowId)
                          .fetchone())

    # Make sure we did not receive None etc for anomaly score
    g_logger.info("cls.testMetricRow.anomaly_score=%r",
                  cls.testMetricRow.anomaly_score)
    g_logger.info("waitAnomalyScore=%r", waitAnomalyScore)
    while (cls.testMetricRow.anomaly_score is None and
           waitAnomalyScore < maxWaitTime):
      g_logger.info("anomaly_score not ready, sleeping...")
      time.sleep(5)
      waitAnomalyScore += 5
      with engine.connect() as conn:
        cls.testMetricRow = (repository.getMetricData(conn,
                                                      cls.uid,
                                                      rowid=cls.testRowId)
                            .fetchone())

    # Decide queryParams for anomalyScore, to and from timestamp
    cls.testAnomalyScore = cls.testMetricRow.anomaly_score
    cls.testTimeStamp = cls.testMetricRow.timestamp
Example #3
0
  def GET(self, metricId=None):
    """
    Get Model Data

    ::

        GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows}

    Parameters:

      :param limit: (optional) max number of records to return
      :type limit: int
      :param from: (optional) return records from this timestamp
      :type from: timestamp
      :param to: (optional) return records up to this timestamp
      :type to: timestamp
      :param anomaly: anomaly score to filter
      :type anomaly: float

    Returns:

    ::

        {
            "data": [
                ["2013-08-15 21:34:00", 222, 0.025, 125],
                ["2013-08-15 21:32:00", 202, 0, 124],
                ["2013-08-15 21:30:00", 202, 0, 123],
                ...
            ],
            "names": [
                "timestamp",
                "value",
                "anomaly_score",
                "rowid
            ]
        }
    """
    queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING']))
    fromTimestamp = queryParams.get("from")
    toTimestamp = queryParams.get("to")
    anomaly = float(queryParams.get("anomaly") or 0.0)
    limit = int(queryParams.get("limit") or 0)

    with web.ctx.connFactory() as conn:
      fields = (schema.metric_data.c.uid,
                schema.metric_data.c.timestamp,
                schema.metric_data.c.metric_value,
                schema.metric_data.c.anomaly_score,
                schema.metric_data.c.rowid)
      names = ("names",) + tuple(["value" if col.name == "metric_value"
                                  else col.name
                                  for col in fields])
      if fromTimestamp:
        sort = schema.metric_data.c.timestamp.asc()
      else:
        sort = schema.metric_data.c.timestamp.desc()

      result = repository.getMetricData(conn,
                                        metricId=metricId,
                                        fields=fields,
                                        fromTimestamp=fromTimestamp,
                                        toTimestamp=toTimestamp,
                                        score=anomaly,
                                        sort=sort)

    if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""):
      results_per_uid = defaultdict(int)
      packer = msgpack.Packer()
      self.addStandardHeaders(content_type='application/octet-stream')
      web.header('X-Accel-Buffering', 'no')

      yield packer.pack(names)
      for row in result:
        if not limit or (limit and len(results_per_uid[row.uid]) < limit):
          resultTuple = (
              row.uid,
              calendar.timegm(row.timestamp.timetuple()),
              row.metric_value,
              row.anomaly_score,
              row.rowid,
            )
          yield packer.pack(resultTuple)
          results_per_uid[row.uid] += 1
    else:

      if metricId is None:
        output = {}
        for row in result:
          uid = row.uid
          default = {"uid": uid, "data": []}
          recordTuple = (
            row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
            row.metric_value,
            row.anomaly_score,
            row.rowid
          )
          metricDataRecord = output.setdefault(uid, default)
          if not limit or (limit and len(metricDataRecord["data"]) < limit):
            metricDataRecord["data"].append(recordTuple)

        results = {
          "metrics":  output.values(),
          "names": names[2:]
        }

      else:
        if limit:
          results = {"names": names[2:],
                     "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                               row.metric_value,
                               row.anomaly_score,
                               row.rowid)
                              for row in itertools.islice(result, 0, limit)]}
        else:
          results = {"names": names[2:],
                     "data": [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                               row.metric_value,
                               row.anomaly_score,
                               row.rowid) for row in result]}
      self.addStandardHeaders()
      yield utils.jsonEncode(results)
Example #4
0
    def POST(cls):
        """Upload the metric info and metric data as a compressed tarfile to S3.

    The request must include the uid of the metric and may include other JSON
    keys as well. For instance, it is likely that a request from the mobile
    application will include information about the current view and data
    being displayed when the feedback request is sent. Any fields in addition
    to uid will be stored with the feedback archive file that is uploaded to
    S3.
    """
        inputData = json.loads(web.data())
        # Get the metric uid
        uid = inputData["uid"]
        del inputData["uid"]

        inputData["server_id"] = _MACHINE_ID

        # Data is written to a temporary directory before uploading
        path = tempfile.mkdtemp()

        try:
            # Retrieve the metric table record and add it to the other input
            # parameters
            metricFields = [
                schema.metric.c.uid, schema.metric.c.datasource,
                schema.metric.c.name, schema.metric.c.description,
                schema.metric.c.server, schema.metric.c.location,
                schema.metric.c.parameters, schema.metric.c.status,
                schema.metric.c.message, schema.metric.c.last_timestamp,
                schema.metric.c.poll_interval, schema.metric.c.tag_name,
                schema.metric.c.last_rowid
            ]

            with repository.engineFactory().connect() as conn:
                metricRow = repository.getMetric(conn, uid, metricFields)
            metric = dict([
                (col.name, utils.jsonDecode(getattr(metricRow, col.name))
                 if col.name == "parameters" else getattr(metricRow, col.name))
                for col in metricFields
            ])
            if metric["tag_name"]:
                metric["display_name"] = "%s (%s)" % (metric["tag_name"],
                                                      metric["server"])
            else:
                metric["display_name"] = metric["server"]

            inputData["metric"] = utils.jsonEncode(metric)

            metricPath = os.path.join(path, "metric.json")
            with open(metricPath, "w") as f:
                json.dump(inputData, f)

            # Retrieve the metric data
            with repository.engineFactory().connect() as conn:
                metricDataRows = repository.getMetricData(conn, uid)
            metricData = [
                dict([(col.name, getattr(metricData, col.name))
                      for col in schema.metric_data.columns])
                for metricData in metricDataRows
            ]

            metricDataPath = os.path.join(path, "metric_data.csv")
            with open(metricDataPath, "w") as f:
                writer = csv.writer(f)
                if len(metricData) > 0:
                    header = metricData[0].keys()
                    # Write the field names first
                    writer.writerow(header)
                    # Then write out the data for each row
                    for dataDict in metricData:
                        row = [dataDict[h] for h in header]
                        writer.writerow(row)

            # Create a tarfile to upload
            ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
            filename = "metric_dump_%s_%s.tar.gz" % (uid, ts)
            tfPath = os.path.join(path, filename)
            with tarfile.open(tfPath, "w:gz") as tf:
                tf.add(metricPath, arcname=os.path.basename(metricPath))
                tf.add(metricDataPath,
                       arcname=os.path.basename(metricDataPath))

            # Upload the tarfile
            return cls._uploadTarfile(filename, tfPath)

        finally:
            shutil.rmtree(path)
    def GET(self, metricId=None):
        """
    Get Model Data

    ::

        GET /_models/{model-id}/data?from={fromTimestamp}&to={toTimestamp}&anomaly={anomalyScore}&limit={numOfRows}

    Parameters:

      :param limit: (optional) max number of records to return
      :type limit: int
      :param from: (optional) return records from this timestamp
      :type from: timestamp
      :param to: (optional) return records up to this timestamp
      :type to: timestamp
      :param anomaly: anomaly score to filter
      :type anomaly: float

    Returns:

    ::

        {
            "data": [
                ["2013-08-15 21:34:00", 222, 0.025, 125],
                ["2013-08-15 21:32:00", 202, 0, 124],
                ["2013-08-15 21:30:00", 202, 0, 123],
                ...
            ],
            "names": [
                "timestamp",
                "value",
                "anomaly_score",
                "rowid
            ]
        }
    """
        queryParams = dict(urlparse.parse_qsl(web.ctx.env['QUERY_STRING']))
        fromTimestamp = queryParams.get("from")
        toTimestamp = queryParams.get("to")
        anomaly = float(queryParams.get("anomaly") or 0.0)
        limit = int(queryParams.get("limit") or 0)

        with web.ctx.connFactory() as conn:
            fields = (schema.metric_data.c.uid, schema.metric_data.c.timestamp,
                      schema.metric_data.c.metric_value,
                      schema.metric_data.c.anomaly_score,
                      schema.metric_data.c.rowid)
            names = ("names", ) + tuple([
                "value" if col.name == "metric_value" else col.name
                for col in fields
            ])
            if fromTimestamp:
                sort = schema.metric_data.c.timestamp.asc()
            else:
                sort = schema.metric_data.c.timestamp.desc()

            result = repository.getMetricData(conn,
                                              metricId=metricId,
                                              fields=fields,
                                              fromTimestamp=fromTimestamp,
                                              toTimestamp=toTimestamp,
                                              score=anomaly,
                                              sort=sort)

        if "application/octet-stream" in web.ctx.env.get('HTTP_ACCEPT', ""):
            results_per_uid = defaultdict(int)
            packer = msgpack.Packer()
            self.addStandardHeaders(content_type='application/octet-stream')
            web.header('X-Accel-Buffering', 'no')

            yield packer.pack(names)
            for row in result:
                if not limit or (limit
                                 and len(results_per_uid[row.uid]) < limit):
                    resultTuple = (
                        row.uid,
                        calendar.timegm(row.timestamp.timetuple()),
                        row.metric_value,
                        row.anomaly_score,
                        row.rowid,
                    )
                    yield packer.pack(resultTuple)
                    results_per_uid[row.uid] += 1
        else:

            if metricId is None:
                output = {}
                for row in result:
                    uid = row.uid
                    default = {"uid": uid, "data": []}
                    recordTuple = (row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                                   row.metric_value, row.anomaly_score,
                                   row.rowid)
                    metricDataRecord = output.setdefault(uid, default)
                    if not limit or (limit and
                                     len(metricDataRecord["data"]) < limit):
                        metricDataRecord["data"].append(recordTuple)

                results = {"metrics": output.values(), "names": names[2:]}

            else:
                if limit:
                    results = {
                        "names":
                        names[2:],
                        "data":
                        [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                          row.metric_value, row.anomaly_score, row.rowid)
                         for row in itertools.islice(result, 0, limit)]
                    }
                else:
                    results = {
                        "names":
                        names[2:],
                        "data":
                        [(row.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                          row.metric_value, row.anomaly_score, row.rowid)
                         for row in result]
                    }
            self.addStandardHeaders()
            yield utils.jsonEncode(results)
Example #6
0
  def POST(cls):
    """Upload the metric info and metric data as a compressed tarfile to S3.

    The request must include the uid of the metric and may include other JSON
    keys as well. For instance, it is likely that a request from the mobile
    application will include information about the current view and data
    being displayed when the feedback request is sent. Any fields in addition
    to uid will be stored with the feedback archive file that is uploaded to
    S3.
    """
    inputData = json.loads(web.data())
    # Get the metric uid
    uid = inputData["uid"]
    del inputData["uid"]

    inputData["server_id"] = _MACHINE_ID

    # Data is written to a temporary directory before uploading
    path = tempfile.mkdtemp()

    try:
      # Retrieve the metric table record and add it to the other input
      # parameters
      metricFields = [schema.metric.c.uid,
                      schema.metric.c.datasource,
                      schema.metric.c.name,
                      schema.metric.c.description,
                      schema.metric.c.server,
                      schema.metric.c.location,
                      schema.metric.c.parameters,
                      schema.metric.c.status,
                      schema.metric.c.message,
                      schema.metric.c.last_timestamp,
                      schema.metric.c.poll_interval,
                      schema.metric.c.tag_name,
                      schema.metric.c.last_rowid]

      with repository.engineFactory().connect() as conn:
        metricRow = repository.getMetric(conn,
                                         uid,
                                         metricFields)
      metric = dict([(col.name, utils.jsonDecode(getattr(metricRow, col.name))
                      if col.name == "parameters"
                      else getattr(metricRow, col.name))
                      for col in metricFields])
      if metric["tag_name"]:
        metric["display_name"] = "%s (%s)" % (metric["tag_name"],
                                               metric["server"])
      else:
        metric["display_name"] = metric["server"]

      inputData["metric"] = utils.jsonEncode(metric)

      metricPath = os.path.join(path, "metric.json")
      with open(metricPath, "w") as f:
        json.dump(inputData, f)

      # Retrieve the metric data
      with repository.engineFactory().connect() as conn:
        metricDataRows = repository.getMetricData(conn, uid)
      metricData = [dict([(col.name, getattr(metricData, col.name))
                          for col in schema.metric_data.columns])
                    for metricData in metricDataRows]

      metricDataPath = os.path.join(path, "metric_data.csv")
      with open(metricDataPath, "w") as f:
        writer = csv.writer(f)
        if len(metricData) > 0:
          header = metricData[0].keys()
          # Write the field names first
          writer.writerow(header)
          # Then write out the data for each row
          for dataDict in metricData:
            row = [dataDict[h] for h in header]
            writer.writerow(row)

      # Create a tarfile to upload
      ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
      filename = "metric_dump_%s_%s.tar.gz" % (uid, ts)
      tfPath = os.path.join(path, filename)
      with tarfile.open(tfPath, "w:gz") as tf:
        tf.add(metricPath, arcname=os.path.basename(metricPath))
        tf.add(metricDataPath, arcname=os.path.basename(metricDataPath))

      # Upload the tarfile
      return cls._uploadTarfile(filename, tfPath)

    finally:
      shutil.rmtree(path)