コード例 #1
0
    def testPublishMetricDataWithDuplicateKeys(self, connectDynamoDB,
                                               _gracefulCreateTable):
        """ Test for elimination of rows with duplicate keys by _publishMetricData
    """
        metricId = "3b035a5916994f2bb950f5717138f94b"

        rowTemplate = dict(rowid=99,
                           ts=epochFromNaiveUTCDatetime(
                               datetime(2015, 3, 20, 0, 46, 28)),
                           value=10305.0,
                           rawAnomaly=0.275,
                           anomaly=0.999840891)

        row1 = dict(rowTemplate)
        row2 = dict(rowTemplate)
        row2["rowid"] = row1["rowid"] + 1
        rows = [row1, row2]

        service = DynamoDBService()

        service._publishMetricData(metricId, rows)

        data = dynamodb_service.convertInferenceResultRowToMetricDataItem(
            metricId, row1)
        mockPutItem = (service._metric_data.batch_write.return_value.__enter__.
                       return_value.put_item)
        mockPutItem.assert_called_once_with(data=data._asdict(),
                                            overwrite=True)
コード例 #2
0
  def testPublishMetricDataWithDuplicateKeys(self, connectDynamoDB,
                                             _gracefulCreateTable):
    """ Test for elimination of rows with duplicate keys by _publishMetricData
    """
    metricId = "3b035a5916994f2bb950f5717138f94b"

    rowTemplate = dict(
      rowid=99,
      ts=epochFromNaiveUTCDatetime(datetime(2015, 3, 20, 0, 46, 28)),
      value=10305.0,
      rawAnomaly=0.275,
      anomaly=0.999840891
    )

    row1 = dict(rowTemplate)
    row2 = dict(rowTemplate)
    row2["rowid"] = row1["rowid"] + 1
    rows = [row1, row2]

    service = DynamoDBService()

    service._publishMetricData(metricId, rows)

    data = dynamodb_service.convertInferenceResultRowToMetricDataItem(metricId,
                                                                      row1)
    mockPutItem = (service._metric_data.batch_write.return_value.__enter__
                   .return_value.put_item)
    mockPutItem.assert_called_once_with(data=data._asdict(), overwrite=True)
コード例 #3
0
    def testModelResultHandlerSkipsStaleBatch(
        self, _amqpUtilsMock, deserializeModelResult, connectDynamoDB, _gracefulCreateTable
    ):
        """ Given a stale batch of model inference results, verify that it isn't
    saved to DynamoDB
    """

        # We're going to mostly mock out all of the arguments to
        # DynamoDBService.messageHandler() since it is normally called by amqp lib.
        # Then simulate the process of handling an inbound batch of model inference
        # results and assert that the appropriate put_item() calls are made at the
        # other end.

        message = amqp.messages.ConsumerMessage(
            body=Mock(),
            properties=Mock(headers=dict()),
            methodInfo=amqp.messages.MessageDeliveryInfo(
                consumerTag=Mock(), deliveryTag=Mock(), redelivered=False, exchange=Mock(), routingKey=""
            ),
            ackImpl=Mock(),
            nackImpl=Mock(),
        )

        # We will have to bypass the normal serialize/deserialize phases to avoid
        # dependency on sqlalchemy rowproxy.  Instead, we'll just mock out the
        # AnomalyService.deserializeModelResult() call, returning an object that
        # approximates a batch of model inference results as much as possible

        ts = epochFromNaiveUTCDatetime(
            datetime.utcnow().replace(microsecond=0) - timedelta(days=DynamoDBService._FRESH_DATA_THRESHOLD_DAYS + 1)
        )

        resultRow = dict(rowid=4790, ts=ts, value=9305.0, rawAnomaly=0.775, anomaly=0.999840891)

        metricId = "3b035a5916994f2bb950f5717138f94b"

        deserializeModelResult.return_value = dict(
            metric=dict(
                uid=metricId,
                name="XIGNITE.AGN.VOLUME",
                description="XIGNITE.AGN.VOLUME",
                resource="Resource-of-XIGNITE.AGN.VOLUME",
                location="",
                datasource="custom",
                spec=dict(userInfo=dict(symbol="AGN", metricType="StockVolume", metricTypeName="Stock Volume")),
            ),
            results=[resultRow],
        )

        service = DynamoDBService()
        publishMetricDataPatch = patch.object(service, "_publishMetricData", spec_set=service._publishMetricData)
        publishInstancePatch = patch.object(
            service, "_publishInstanceDataHourly", spec_set=service._publishInstanceDataHourly
        )
        with publishMetricDataPatch as publishMetricDataMock, publishInstancePatch as publishInstanceMock:
            service.messageHandler(message)

            deserializeModelResult.assert_called_once_with(message.body)
            self.assertEqual(publishMetricDataMock.call_count, 0)
            self.assertEqual(publishInstanceMock.call_count, 0)
コード例 #4
0
    def testMessageHandlerRoutesTweetDataToDynamoDB(self, _amqpUtilsMock, connectDynamoDB, _gracefulCreateTable):
        """ Simple test for twitter interface
    """

        ##    channel = Mock()
        ##    method = Mock(routing_key="taurus.data.non-metric.twitter")
        ##    properties = Mock()

        tweetData = [
            {
                "metric_name": "Metric Name",
                "tweet_uid": "3b035a5916994f2bb950f5717138f94b",
                "created_at": "2015-02-19T19:43:24.870109",
                "agg_ts": "2015-02-19T19:43:24.870118",
                "text": "Tweet text",
                "userid": "10",
                "username": "******",
                "retweet_count": "0",
            }
        ]

        message = amqp.messages.ConsumerMessage(
            body=json.dumps(tweetData),
            properties=Mock(),
            methodInfo=amqp.messages.MessageDeliveryInfo(
                consumerTag=Mock(),
                deliveryTag=Mock(),
                redelivered=False,
                exchange=Mock(),
                routingKey="taurus.data.non-metric.twitter",
            ),
            ackImpl=Mock(),
            nackImpl=Mock(),
        )

        service = DynamoDBService()
        service.messageHandler(message)

        (
            service._metric_tweets.batch_write.return_value.__enter__.return_value.put_item.assert_called_once_with(
                data=OrderedDict(
                    [
                        ("metric_name_tweet_uid", "Metric Name-3b035a5916994f2bb950f5717138f94b"),
                        ("metric_name", "Metric Name"),
                        ("tweet_uid", "3b035a5916994f2bb950f5717138f94b"),
                        ("created_at", "2015-02-19T19:43:24.870109"),
                        ("agg_ts", "2015-02-19T19:43:24.870118"),
                        ("text", "Tweet text"),
                        ("userid", "10"),
                        ("username", "Tweet username"),
                        ("retweet_count", "0"),
                    ]
                ),
                overwrite=True,
            )
        )
コード例 #5
0
    def testMessageHandlerRoutesTweetDataToDynamoDB(self, _amqpUtilsMock,
                                                    connectDynamoDB,
                                                    _gracefulCreateTable):
        """ Simple test for twitter interface
    """

        ##    channel = Mock()
        ##    method = Mock(routing_key="taurus.data.non-metric.twitter")
        ##    properties = Mock()

        tweetData = [{
            "metric_name": "Metric Name",
            "tweet_uid": "3b035a5916994f2bb950f5717138f94b",
            "created_at": "2015-02-19T19:43:24.870109",
            "agg_ts": "2015-02-19T19:43:24.870118",
            "text": "Tweet text",
            "userid": "10",
            "username": "******",
            "retweet_count": "0"
        }]

        message = amqp.messages.ConsumerMessage(
            body=json.dumps(tweetData),
            properties=Mock(),
            methodInfo=amqp.messages.MessageDeliveryInfo(
                consumerTag=Mock(),
                deliveryTag=Mock(),
                redelivered=False,
                exchange=Mock(),
                routingKey="taurus.data.non-metric.twitter"),
            ackImpl=Mock(),
            nackImpl=Mock())

        service = DynamoDBService()
        service.messageHandler(message)

        (service._metric_tweets.batch_write.return_value.__enter__.
         return_value.put_item.assert_called_once_with(data=OrderedDict([
             ("metric_name_tweet_uid",
              "Metric Name-3b035a5916994f2bb950f5717138f94b"),
             ("metric_name", "Metric Name"),
             ("tweet_uid", "3b035a5916994f2bb950f5717138f94b"),
             ("created_at", "2015-02-19T19:43:24.870109"),
             ("agg_ts", "2015-02-19T19:43:24.870118"), ("text", "Tweet text"),
             ("userid", "10"), ("username", "Tweet username"),
             ("retweet_count", "0")
         ]),
                                                       overwrite=True))
コード例 #6
0
    def testDynamoDBServiceRun(self, amqpClientClassMock, connectDynamoDB,
                               _gracefulCreateTable):
        """ Very basic test to validate that the service follows AMQP protocol.

    Upon `run()`, it should:

    1. Connecto to RabbitMQ
    2. Open a channel
    3. Declare two exchanges; one for model results, and one for non-metric
      data
    4. Declare a durable "dynamodb" queue
    5. Bind the "dynamodb" queue to the two exchanges
    6. Start consuming.
    """

        amqpClientMock = MagicMock(spec_set=(
            dynamodb_service.amqp.synchronous_amqp_client.SynchronousAmqpClient
        ))
        amqpClientMock.__enter__.return_value = amqpClientMock

        amqpClientClassMock.return_value = amqpClientMock

        DynamoDBService().run()

        self.assertTrue(amqpClientClassMock.called,
                        "Service did not connect to rabbitmq")

        self.assertTrue(amqpClientMock.declareExchange.called)

        amqpClientMock.declareExchange.assert_any_call(
            durable=True,
            exchangeType="fanout",
            exchange=taurus.engine.config.get("metric_streamer",
                                              "results_exchange_name"))

        amqpClientMock.declareExchange.assert_any_call(
            durable=True,
            exchangeType="topic",
            exchange=taurus.engine.config.get("non_metric_data",
                                              "exchange_name"))

        amqpClientMock.declareQueue.assert_called_once_with(ANY, durable=True)

        amqpClientMock.bindQueue.assert_any_call(
            queue=amqpClientMock.declareQueue.return_value.queue,
            exchange=taurus.engine.config.get("metric_streamer",
                                              "results_exchange_name"),
            routingKey="")

        amqpClientMock.bindQueue.assert_any_call(
            exchange=taurus.engine.config.get("non_metric_data",
                                              "exchange_name"),
            queue=amqpClientMock.declareQueue.return_value.queue,
            routingKey="#")

        self.assertTrue(amqpClientMock.readEvents.called)
コード例 #7
0
 def testDynamoDBServiceInit(self, connectDynamoDB, _gracefulCreateTable):
     service = DynamoDBService()
     self.assertTrue(hasattr(service, "run"))
     self.assertTrue(
         connectDynamoDB.called, "Service did not attempt to "
         "authenticate with DynamoDB API during initialization")
     self.assertTrue(
         _gracefulCreateTable.called, "Service did not attempt to "
         "create any dynamodb tables")
     for callArgs, _ in _gracefulCreateTable.call_args_list:
         self.assertIsInstance(
             callArgs[0], DynamoDBDefinition, "Service "
             "attempted to create a table using something that isn't a subclass of "
             "DynamoDBDefinition")
コード例 #8
0
  def testPublishInstanceDataHourly(self, connectDynamoDB,
                                    _gracefulCreateTable):
    connectionMock = Mock(spec_set=DynamoDBConnection)
    connectionMock.update_item.side_effect = ResourceNotFoundException(
        400, "item not found")
    connectDynamoDB.return_value = connectionMock
    tableName = InstanceDataHourlyDynamoDBDefinition().tableName
    instanceName = "testName"
    condition = "attribute_not_exists(instance_id)"
    rows = [
        dict(
            rowid=99,
            ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 46, 28)),
            value=10305.0,
            rawAnomaly=0.275,
            anomaly=0.999840891
        ),
        dict(
            rowid=100,
            ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 51, 28)),
            value=9305.0,
            rawAnomaly=0.975,
            anomaly=0.999990891
        ),
        dict(
            rowid=101,
            ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 56, 20)),
            value=6111.0,
            rawAnomaly=0.775,
            anomaly=0.999940891
        ),
        dict(
            rowid=102,
            ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 1, 1, 38)),
            value=7092.0,
            rawAnomaly=0.775,
            anomaly=0.999640891
        )
    ]

    service = DynamoDBService()

    # Run the function under test
    service._publishInstanceDataHourly(instanceName, "TwitterVolume", rows)

    # Validate results
    self.assertEqual(connectionMock.update_item.call_count, 2)
    self.assertEqual(connectionMock.put_item.call_count, 2)
    calls = connectionMock.put_item.call_args_list

    kwargs0 = calls[0][1]
    item0 = kwargs0["item"]
    self.assertDictEqual(item0["instance_id"], {"S": instanceName})
    self.assertEqual(item0["date_hour"], {"S": "2015-02-20T00"})
    self.assertEqual(item0["date"], {"S": "2015-02-20"})
    self.assertEqual(item0["hour"], {"S": "00"})
    self.assertDictEqual(item0["anomaly_score"]["M"]["TwitterVolume"],
                         {"N": "0.99999"})
    self.assertEqual(kwargs0["condition_expression"], condition)

    kwargs1 = calls[1][1]
    item1 = kwargs1["item"]
    self.assertEqual(item1["instance_id"], {"S": instanceName})
    self.assertEqual(item1["date_hour"], {"S": "2015-02-20T01"})
    self.assertEqual(item1["date"], {"S": "2015-02-20"})
    self.assertEqual(item1["hour"], {"S": "01"})
    self.assertDictEqual(item1["anomaly_score"]["M"]["TwitterVolume"],
                         {"N": "0.99964"})
    self.assertEqual(kwargs1["condition_expression"], condition)
コード例 #9
0
  def testMessageHandlerRoutesMetricDataToDynamoDB(
      self, _amqpUtilsMock,
      deserializeModelResult, connectDynamoDB, _gracefulCreateTable):
    """ Given a batch of model inference results, send the appropriate data to
    DynamoDB tables according to design in an environment where both rabbitmq
    and dynamodb are mocked out
    """

    # We're going to mostly mock out all of the arguments to
    # DynamoDBService.messageHandler() since it is normally called by amqp lib.
    # Then simulate the process of handling an inbound batch of model inference
    # results and assert that the appropriate put_item() calls are made at the
    # other end.
    message = amqp.messages.ConsumerMessage(
      body=Mock(),
      properties=Mock(headers=dict()),
      methodInfo=amqp.messages.MessageDeliveryInfo(consumerTag=Mock(),
                                                   deliveryTag=Mock(),
                                                   redelivered=False,
                                                   exchange=Mock(),
                                                   routingKey=""),
      ackImpl=Mock(),
      nackImpl=Mock())

    # We will have to bypass the normal serialize/deserialize phases to avoid
    # dependency on sqlalchemy rowproxy.  Instead, we'll just mock out the
    # AnomalyService.deserializeModelResult() call, returning an object that
    # approximates a batch of model inference results as much as possible

    now = int(time.time())

    resultRow = dict(
      rowid=4790,
      ts=now,
      value=9305.0,
      rawAnomaly=0.775,
      anomaly=0.999840891
    )

    metricId = "3b035a5916994f2bb950f5717138f94b"

    deserializeModelResult.return_value = dict(
      metric=dict(
        uid=metricId,
        name="XIGNITE.AGN.VOLUME",
        description="XIGNITE.AGN.VOLUME",
        resource="Resource-of-XIGNITE.AGN.VOLUME",
        location = "",
        datasource = "custom",
        spec=dict(
          userInfo=dict(
            symbol="AGN",
            metricType="StockVolume",
            metricTypeName="Stock Volume"
          )
        )
      ),

      results=[resultRow]
    )

    service = DynamoDBService()
    service.messageHandler(message)

    deserializeModelResult.assert_called_once_with(message.body)

    mockMetricDataPutItem = (
      service._metric_data.batch_write.return_value.__enter__
      .return_value.put_item)
    data = dynamodb_service.convertInferenceResultRowToMetricDataItem(
      metricId, resultRow)
    mockMetricDataPutItem.assert_called_once_with(data=data._asdict(),
                                                  overwrite=True)

    self.assertFalse(service._metric_tweets.batch_write.called)


    # Make sure that a model command result doesn't get mistaken for an
    # inference result batch
    deserializeModelResult.return_value = Mock()
    message.properties = Mock(headers=dict(dataType="model-cmd-result"))
    message.body = Mock()
    service = DynamoDBService()
    with patch.object(service, "_handleModelCommandResult",
                      spec_set=service._handleModelCommandResult):
      service.messageHandler(message)
      service._handleModelCommandResult.assert_called_once_with(message.body)
コード例 #10
0
  def testPathwayToDynamoDB(self):
    """ Test metric data pathway to dynamodb
    """

    metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16))

    nativeMetric = {
      "modelParams": {
        "minResolution": 0.2,
        "min": 0.0,
        "max": 10000.0,
      },
      "datasource": "custom",
      "metricSpec": {
        "metric": metricName,
        "resource": "Test",
        "userInfo": {
          "symbol": "TEST",
          "metricType": "TwitterVolume",
          "metricTypeName": "Twitter Volume",
        }
      }
    }
    metricName = nativeMetric["metricSpec"]["metric"]
    instanceName = nativeMetric["metricSpec"]["resource"]
    userInfo = nativeMetric["metricSpec"]["userInfo"]

    now = datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0)

    data = [
      (5000.0, now - datetime.timedelta(minutes=10)),
      (6000.0, now - datetime.timedelta(minutes=5)),
      (7000.0, now),
    ]

    # We'll be explicitly deleting the metric below, but we need to add a
    # cleanup step that runs in case there is some other failure that prevents
    # that part of the test from being reached.

    def gracefulDelete():
      try:
        self._deleteMetric(metricName)
      except ObjectNotFoundError:
        pass

    self.addCleanup(gracefulDelete)

    # Add custom metric data
    sock = socket.socket()
    sock.connect(("localhost", self.plaintextPort))
    for metricValue, ts in data:
      sock.sendall("%s %r %s\n" % (metricName,
                                   metricValue,
                                   epochFromNaiveUTCDatetime(ts)))

    self.gracefullyCloseSocket(sock)

    uid = self.checkMetricCreated(metricName)

    # Save the uid for later
    LOGGER.info("Metric %s has uid: %s", metricName, uid)

    # Send model creation request
    model = self._createModel(nativeMetric)
    parameters = json.loads(model.parameters)
    self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo)

    for _ in xrange(60):
      with self.engine.begin() as conn:
        metric = repository.getMetric(conn, uid)

      if metric.status == MetricStatus.ACTIVE:
        break
      LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid)
      time.sleep(1)
    else:
      self.fail("Model results not available within 5 minutes")

    # Check that the data all got processed
    self.checkModelResultsSize(uid, 3)

    # Now check that the data was published to dynamodb...
    dynamodb = DynamoDBService.connectDynamoDB()

    metricTable = Table(MetricDynamoDBDefinition().tableName,
                        connection=dynamodb)
    metricItem = metricTable.lookup(uid)
    self.assertEqual(metricItem["uid"], uid)
    self.assertEqual(metricItem["name"], metricName)
    self.assertEqual(metricItem["metricType"], "TwitterVolume")
    self.assertEqual(metricItem["metricTypeName"], "Twitter Volume")
    self.assertEqual(metricItem["symbol"], "TEST")

    metricDataTable = Table(MetricDataDynamoDBDefinition().tableName,
                            connection=dynamodb)
    instanceDataAnomalyScores = {}
    for metricValue, ts in data:
      metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
        metricDataTable.lookup
      )(uid, ts.isoformat())
      # There is no server-side cleanup for metric data, so remove it here for
      # now to avoid accumulating test data
      self.addCleanup(metricDataItem.delete)
      self.assertEqual(metricValue, metricDataItem["metric_value"])
      dt = datetime.datetime.strptime(metricDataItem["timestamp"],
                                      "%Y-%m-%dT%H:%M:%S")
      self.assertEqual(ts, dt)
      ts = ts.replace(minute=0, second=0, microsecond=0)
      date = ts.strftime("%Y-%m-%d")
      hour = ts.strftime("%H")
      key = (date, hour)
      maxVal = instanceDataAnomalyScores.get(key, 0.0)
      instanceDataAnomalyScores[key] = max(
          maxVal, metricDataItem["anomaly_score"])

    # And check that the aggregated instance data is updated
    instanceDataHourlyTable = Table(
        InstanceDataHourlyDynamoDBDefinition().tableName, connection=dynamodb)
    for key, anomalyScore in instanceDataAnomalyScores.iteritems():
      date, hour = key
      instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
        instanceDataHourlyTable.lookup
      )(instanceName, "%sT%s" % (date, hour))
      self.addCleanup(instanceDataHourlyItem.delete)
      self.assertAlmostEqual(
          anomalyScore,
          float(instanceDataHourlyItem["anomaly_score"]["TwitterVolume"]))
      self.assertEqual(date, instanceDataHourlyItem["date"])
      self.assertEqual(hour, instanceDataHourlyItem["hour"])

    # Now send some twitter data and validate that it made it to dynamodb

    twitterData = [
      {
        "metric_name": metricName,
        "tweet_uid": uid,
        "created_at": "2015-02-19T19:43:24.870109",
        "agg_ts": "2015-02-19T19:43:24.870118",
        "text": "Tweet text",
        "userid": "10",
        "username": "******",
        "retweet_count": "0"
      }
    ]

    with MessageBusConnector() as messageBus:
      messageBus.publishExg(
        exchange=self.config.get("non_metric_data", "exchange_name"),
        routingKey=(
          self.config.get("non_metric_data", "exchange_name") + ".twitter"),
        body=json.dumps(twitterData)
      )


    metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName,
                              connection=dynamodb)
    for _ in range(30):
      try:
        metricTweetItem =  metricTweetsTable.lookup(
          twitterData[0]["text"],
          twitterData[0]["agg_ts"]
        )
        break
      except ItemNotFound:
        # LOL eventual consistency
        time.sleep(1)
        continue
    # There is no server-side cleanup for tweet data, so remove it here for
    # now to avoid accumulating test data
    self.addCleanup(metricTweetItem.delete)
    self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"])
    self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"])
    self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
    self.assertEqual(metricTweetItem["text"], twitterData[0]["text"])
    self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"])
    self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"])
    self.assertEqual(metricTweetItem["copy_count"], 0)

    sort_key = twitterData[0]["agg_ts"]

    ts = (epochFromNaiveUTCDatetime(
      datetime.datetime.strptime(twitterData[0]["agg_ts"].partition(".")[0],
                                 "%Y-%m-%dT%H:%M:%S")) * 1e5)
    queryResult = metricTweetsTable.query_2(
      metric_name__eq=metricName,
      sort_key__gte=ts,
      index="taurus.metric_data-metric_name_index")
    queriedMetricTweetItem = next(queryResult)

    self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(queriedMetricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"])
    self.assertEqual(queriedMetricTweetItem["created_at"], twitterData[0]["created_at"])
    self.assertEqual(queriedMetricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
    self.assertEqual(queriedMetricTweetItem["text"], twitterData[0]["text"])
    self.assertEqual(queriedMetricTweetItem["userid"], twitterData[0]["userid"])
    self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"])
    self.assertEqual(queriedMetricTweetItem["retweet_count"], twitterData[0]["retweet_count"])
    self.assertEqual(queriedMetricTweetItem["copy_count"], 0)
    self.assertEqual(queriedMetricTweetItem["sort_key"], ts)

    duplicatedTwitterData = [
      {
        "metric_name": "copy of " + metricName,
        "tweet_uid": "copy of " + uid,
        "created_at": "2015-02-19T19:45:24.870109",
        "agg_ts": "2015-02-19T19:43:24.870118", # Same agg_ts!
        "text": "Tweet text", # Same text!
        "userid": "20",
        "username": "******",
        "retweet_count": "0"
      }
    ]

    with MessageBusConnector() as messageBus:
      messageBus.publishExg(
        exchange=self.config.get("non_metric_data", "exchange_name"),
        routingKey=(
          self.config.get("non_metric_data", "exchange_name") + ".twitter"),
        body=json.dumps(duplicatedTwitterData)
      )

    for _ in range(30):
      metricTweetItem =  metricTweetsTable.lookup(
        twitterData[0]["text"],
        twitterData[0]["agg_ts"]
      )

      if metricTweetItem["copy_count"] != 1:
        time.sleep(1)
        continue

      # Assert same as original, except for copy_count, which should be 1

      self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
      self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"])
      self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"])
      self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
      self.assertEqual(metricTweetItem["text"], twitterData[0]["text"])
      self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"])
      self.assertEqual(metricTweetItem["username"], twitterData[0]["username"])
      self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"])
      self.assertEqual(metricTweetItem["sort_key"], ts + 1)

      break
    else:
      self.fail("copy_count of original tweet not updated within reasonable"
                " amount of time (~30s) for duplicated tweet.")

    # Delete metric and ensure metric is deleted from dynamodb, too
    self._deleteMetric(metricName)

    for _ in xrange(60):
      time.sleep(1)
      try:
        metricItem = metricTable.lookup(uid)
      except ItemNotFound as err:
        break
    else:
      self.fail("Metric not deleted from dynamodb")
コード例 #11
0
    def testPublishInstanceDataHourly(self, connectDynamoDB,
                                      _gracefulCreateTable):
        connectionMock = Mock(spec_set=DynamoDBConnection)
        connectionMock.update_item.side_effect = ResourceNotFoundException(
            400, "item not found")
        connectDynamoDB.return_value = connectionMock
        tableName = InstanceDataHourlyDynamoDBDefinition().tableName
        instanceName = "testName"
        condition = "attribute_not_exists(instance_id)"
        rows = [
            dict(rowid=99,
                 ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 46,
                                                       28)),
                 value=10305.0,
                 rawAnomaly=0.275,
                 anomaly=0.999840891),
            dict(rowid=100,
                 ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 51,
                                                       28)),
                 value=9305.0,
                 rawAnomaly=0.975,
                 anomaly=0.999990891),
            dict(rowid=101,
                 ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 0, 56,
                                                       20)),
                 value=6111.0,
                 rawAnomaly=0.775,
                 anomaly=0.999940891),
            dict(rowid=102,
                 ts=epochFromNaiveUTCDatetime(datetime(2015, 2, 20, 1, 1, 38)),
                 value=7092.0,
                 rawAnomaly=0.775,
                 anomaly=0.999640891)
        ]

        service = DynamoDBService()

        # Run the function under test
        service._publishInstanceDataHourly(instanceName, "TwitterVolume", rows)

        # Validate results
        self.assertEqual(connectionMock.update_item.call_count, 2)
        self.assertEqual(connectionMock.put_item.call_count, 2)
        calls = connectionMock.put_item.call_args_list

        kwargs0 = calls[0][1]
        item0 = kwargs0["item"]
        self.assertDictEqual(item0["instance_id"], {"S": instanceName})
        self.assertEqual(item0["date_hour"], {"S": "2015-02-20T00"})
        self.assertEqual(item0["date"], {"S": "2015-02-20"})
        self.assertEqual(item0["hour"], {"S": "00"})
        self.assertDictEqual(item0["anomaly_score"]["M"]["TwitterVolume"],
                             {"N": "0.99999"})
        self.assertEqual(kwargs0["condition_expression"], condition)

        kwargs1 = calls[1][1]
        item1 = kwargs1["item"]
        self.assertEqual(item1["instance_id"], {"S": instanceName})
        self.assertEqual(item1["date_hour"], {"S": "2015-02-20T01"})
        self.assertEqual(item1["date"], {"S": "2015-02-20"})
        self.assertEqual(item1["hour"], {"S": "01"})
        self.assertDictEqual(item1["anomaly_score"]["M"]["TwitterVolume"],
                             {"N": "0.99964"})
        self.assertEqual(kwargs1["condition_expression"], condition)
コード例 #12
0
    def testModelResultHandlerSkipsStaleBatch(self, _amqpUtilsMock,
                                              deserializeModelResult,
                                              connectDynamoDB,
                                              _gracefulCreateTable):
        """ Given a stale batch of model inference results, verify that it isn't
    saved to DynamoDB
    """

        # We're going to mostly mock out all of the arguments to
        # DynamoDBService.messageHandler() since it is normally called by amqp lib.
        # Then simulate the process of handling an inbound batch of model inference
        # results and assert that the appropriate put_item() calls are made at the
        # other end.

        message = amqp.messages.ConsumerMessage(
            body=Mock(),
            properties=Mock(headers=dict()),
            methodInfo=amqp.messages.MessageDeliveryInfo(consumerTag=Mock(),
                                                         deliveryTag=Mock(),
                                                         redelivered=False,
                                                         exchange=Mock(),
                                                         routingKey=""),
            ackImpl=Mock(),
            nackImpl=Mock())

        # We will have to bypass the normal serialize/deserialize phases to avoid
        # dependency on sqlalchemy rowproxy.  Instead, we'll just mock out the
        # AnomalyService.deserializeModelResult() call, returning an object that
        # approximates a batch of model inference results as much as possible

        ts = epochFromNaiveUTCDatetime(
            datetime.utcnow().replace(microsecond=0) -
            timedelta(days=DynamoDBService._FRESH_DATA_THRESHOLD_DAYS + 1))

        resultRow = dict(rowid=4790,
                         ts=ts,
                         value=9305.0,
                         rawAnomaly=0.775,
                         anomaly=0.999840891)

        metricId = "3b035a5916994f2bb950f5717138f94b"

        deserializeModelResult.return_value = dict(metric=dict(
            uid=metricId,
            name="XIGNITE.AGN.VOLUME",
            description="XIGNITE.AGN.VOLUME",
            resource="Resource-of-XIGNITE.AGN.VOLUME",
            location="",
            datasource="custom",
            spec=dict(userInfo=dict(symbol="AGN",
                                    metricType="StockVolume",
                                    metricTypeName="Stock Volume"))),
                                                   results=[resultRow])

        service = DynamoDBService()
        publishMetricDataPatch = patch.object(
            service, "_publishMetricData", spec_set=service._publishMetricData)
        publishInstancePatch = patch.object(
            service,
            "_publishInstanceDataHourly",
            spec_set=service._publishInstanceDataHourly)
        with publishMetricDataPatch as publishMetricDataMock, \
            publishInstancePatch as publishInstanceMock:
            service.messageHandler(message)

            deserializeModelResult.assert_called_once_with(message.body)
            self.assertEqual(publishMetricDataMock.call_count, 0)
            self.assertEqual(publishInstanceMock.call_count, 0)
コード例 #13
0
    def testMessageHandlerRoutesMetricDataToDynamoDB(self, _amqpUtilsMock,
                                                     deserializeModelResult,
                                                     connectDynamoDB,
                                                     _gracefulCreateTable):
        """ Given a batch of model inference results, send the appropriate data to
    DynamoDB tables according to design in an environment where both rabbitmq
    and dynamodb are mocked out
    """

        # We're going to mostly mock out all of the arguments to
        # DynamoDBService.messageHandler() since it is normally called by amqp lib.
        # Then simulate the process of handling an inbound batch of model inference
        # results and assert that the appropriate put_item() calls are made at the
        # other end.
        message = amqp.messages.ConsumerMessage(
            body=Mock(),
            properties=Mock(headers=dict()),
            methodInfo=amqp.messages.MessageDeliveryInfo(consumerTag=Mock(),
                                                         deliveryTag=Mock(),
                                                         redelivered=False,
                                                         exchange=Mock(),
                                                         routingKey=""),
            ackImpl=Mock(),
            nackImpl=Mock())

        # We will have to bypass the normal serialize/deserialize phases to avoid
        # dependency on sqlalchemy rowproxy.  Instead, we'll just mock out the
        # AnomalyService.deserializeModelResult() call, returning an object that
        # approximates a batch of model inference results as much as possible

        now = int(time.time())

        resultRow = dict(rowid=4790,
                         ts=now,
                         value=9305.0,
                         rawAnomaly=0.775,
                         anomaly=0.999840891)

        metricId = "3b035a5916994f2bb950f5717138f94b"

        deserializeModelResult.return_value = dict(metric=dict(
            uid=metricId,
            name="XIGNITE.AGN.VOLUME",
            description="XIGNITE.AGN.VOLUME",
            resource="Resource-of-XIGNITE.AGN.VOLUME",
            location="",
            datasource="custom",
            spec=dict(userInfo=dict(symbol="AGN",
                                    metricType="StockVolume",
                                    metricTypeName="Stock Volume"))),
                                                   results=[resultRow])

        service = DynamoDBService()
        service.messageHandler(message)

        deserializeModelResult.assert_called_once_with(message.body)

        mockMetricDataPutItem = (service._metric_data.batch_write.return_value.
                                 __enter__.return_value.put_item)
        data = dynamodb_service.convertInferenceResultRowToMetricDataItem(
            metricId, resultRow)
        mockMetricDataPutItem.assert_called_once_with(data=data._asdict(),
                                                      overwrite=True)

        self.assertFalse(service._metric_tweets.batch_write.called)

        # Make sure that a model command result doesn't get mistaken for an
        # inference result batch
        deserializeModelResult.return_value = Mock()
        message.properties = Mock(headers=dict(dataType="model-cmd-result"))
        message.body = Mock()
        service = DynamoDBService()
        with patch.object(service,
                          "_handleModelCommandResult",
                          spec_set=service._handleModelCommandResult):
            service.messageHandler(message)
            service._handleModelCommandResult.assert_called_once_with(
                message.body)
コード例 #14
0
    def testPathwayToDynamoDB(self):
        """ Test metric data pathway to dynamodb
    """

        metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16))

        nativeMetric = {
            "modelParams": {
                "minResolution": 0.2,
                "min": 0.0,
                "max": 10000.0,
            },
            "datasource": "custom",
            "metricSpec": {
                "metric": metricName,
                "resource": "Test",
                "userInfo": {
                    "symbol": "TEST",
                    "metricType": "TwitterVolume",
                    "metricTypeName": "Twitter Volume",
                }
            }
        }
        metricName = nativeMetric["metricSpec"]["metric"]
        instanceName = nativeMetric["metricSpec"]["resource"]
        userInfo = nativeMetric["metricSpec"]["userInfo"]

        now = datetime.datetime.utcnow().replace(minute=0,
                                                 second=0,
                                                 microsecond=0)

        data = [
            (5000.0, now - datetime.timedelta(minutes=10)),
            (6000.0, now - datetime.timedelta(minutes=5)),
            (7000.0, now),
        ]

        # We'll be explicitly deleting the metric below, but we need to add a
        # cleanup step that runs in case there is some other failure that prevents
        # that part of the test from being reached.

        def gracefulDelete():
            try:
                self._deleteMetric(metricName)
            except ObjectNotFoundError:
                pass

        self.addCleanup(gracefulDelete)

        # Add custom metric data
        sock = socket.socket()
        sock.connect(("localhost", self.plaintextPort))
        for metricValue, ts in data:
            sock.sendall(
                "%s %r %s\n" %
                (metricName, metricValue, epochFromNaiveUTCDatetime(ts)))

        self.gracefullyCloseSocket(sock)

        uid = self.checkMetricCreated(metricName)

        # Save the uid for later
        LOGGER.info("Metric %s has uid: %s", metricName, uid)

        # Send model creation request
        model = self._createModel(nativeMetric)
        parameters = json.loads(model.parameters)
        self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo)

        for _ in xrange(60):
            with self.engine.begin() as conn:
                metric = repository.getMetric(conn, uid)

            if metric.status == MetricStatus.ACTIVE:
                break
            LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid)
            time.sleep(1)
        else:
            self.fail("Model results not available within 5 minutes")

        # Check that the data all got processed
        self.checkModelResultsSize(uid, 3)

        # Now check that the data was published to dynamodb...
        dynamodb = DynamoDBService.connectDynamoDB()

        metricTable = Table(MetricDynamoDBDefinition().tableName,
                            connection=dynamodb)
        metricItem = metricTable.lookup(uid)
        self.assertEqual(metricItem["uid"], uid)
        self.assertEqual(metricItem["name"], metricName)
        self.assertEqual(metricItem["metricType"], "TwitterVolume")
        self.assertEqual(metricItem["metricTypeName"], "Twitter Volume")
        self.assertEqual(metricItem["symbol"], "TEST")

        metricDataTable = Table(MetricDataDynamoDBDefinition().tableName,
                                connection=dynamodb)
        instanceDataAnomalyScores = {}
        for metricValue, ts in data:
            metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
                metricDataTable.lookup)(uid, ts.isoformat())
            # There is no server-side cleanup for metric data, so remove it here for
            # now to avoid accumulating test data
            self.addCleanup(metricDataItem.delete)
            self.assertEqual(metricValue, metricDataItem["metric_value"])
            dt = datetime.datetime.strptime(metricDataItem["timestamp"],
                                            "%Y-%m-%dT%H:%M:%S")
            self.assertEqual(ts, dt)
            ts = ts.replace(minute=0, second=0, microsecond=0)
            date = ts.strftime("%Y-%m-%d")
            hour = ts.strftime("%H")
            key = (date, hour)
            maxVal = instanceDataAnomalyScores.get(key, 0.0)
            instanceDataAnomalyScores[key] = max(
                maxVal, metricDataItem["anomaly_score"])

        # And check that the aggregated instance data is updated
        instanceDataHourlyTable = Table(
            InstanceDataHourlyDynamoDBDefinition().tableName,
            connection=dynamodb)
        for key, anomalyScore in instanceDataAnomalyScores.iteritems():
            date, hour = key
            instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR(
                instanceDataHourlyTable.lookup)(instanceName,
                                                "%sT%s" % (date, hour))
            self.addCleanup(instanceDataHourlyItem.delete)
            self.assertAlmostEqual(
                anomalyScore,
                float(
                    instanceDataHourlyItem["anomaly_score"]["TwitterVolume"]))
            self.assertEqual(date, instanceDataHourlyItem["date"])
            self.assertEqual(hour, instanceDataHourlyItem["hour"])

        # Now send some twitter data and validate that it made it to dynamodb

        twitterData = [{
            "metric_name": metricName,
            "tweet_uid": uid,
            "created_at": "2015-02-19T19:43:24.870109",
            "agg_ts": "2015-02-19T19:43:24.870118",
            "text": "Tweet text",
            "userid": "10",
            "username": "******",
            "retweet_count": "0"
        }]

        with MessageBusConnector() as messageBus:
            messageBus.publishExg(
                exchange=self.config.get("non_metric_data", "exchange_name"),
                routingKey=(
                    self.config.get("non_metric_data", "exchange_name") +
                    ".twitter"),
                body=json.dumps(twitterData))

        metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName,
                                  connection=dynamodb)
        metricTweetItem = metricTweetsTable.lookup(
            "-".join((metricName, uid)), "2015-02-19T19:43:24.870118")
        # There is no server-side cleanup for tweet data, so remove it here for
        # now to avoid accumulating test data
        self.addCleanup(metricTweetItem.delete)
        self.assertEqual(metricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(metricTweetItem["tweet_uid"],
                         twitterData[0]["tweet_uid"])
        self.assertEqual(metricTweetItem["created_at"],
                         twitterData[0]["created_at"])
        self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"])
        self.assertEqual(metricTweetItem["text"], twitterData[0]["text"])
        self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"])
        self.assertEqual(metricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(metricTweetItem["retweet_count"],
                         twitterData[0]["retweet_count"])

        queryResult = metricTweetsTable.query_2(
            metric_name__eq=metricName,
            agg_ts__eq=twitterData[0]["agg_ts"],
            index="taurus.metric_data-metric_name_index")
        queriedMetricTweetItem = next(queryResult)

        self.assertEqual(queriedMetricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(queriedMetricTweetItem["tweet_uid"],
                         twitterData[0]["tweet_uid"])
        self.assertEqual(queriedMetricTweetItem["created_at"],
                         twitterData[0]["created_at"])
        self.assertEqual(queriedMetricTweetItem["agg_ts"],
                         twitterData[0]["agg_ts"])
        self.assertEqual(queriedMetricTweetItem["text"],
                         twitterData[0]["text"])
        self.assertEqual(queriedMetricTweetItem["userid"],
                         twitterData[0]["userid"])
        self.assertEqual(queriedMetricTweetItem["username"],
                         twitterData[0]["username"])
        self.assertEqual(queriedMetricTweetItem["retweet_count"],
                         twitterData[0]["retweet_count"])

        # Delete metric and ensure metric is deleted from dynamodb, too
        self._deleteMetric(metricName)

        for _ in xrange(60):
            time.sleep(1)
            try:
                metricItem = metricTable.lookup(uid)
            except ItemNotFound as err:
                break
        else:
            self.fail("Metric not deleted from dynamodb")