Example #1
0
    def testMetricDataBatchWrite(self, messageBusConnectorClassMock):

        samples = [
            ("FOO.BAR.%d" % i, i * 3.789, i * 300)
            for i in xrange((metric_utils._METRIC_DATA_BATCH_WRITE_SIZE * 3) /
                            2)
        ]

        messageBusConnectorClass = (
            metric_utils.message_bus_connector.MessageBusConnector)
        messageBusMock = MagicMock(
            spec_set=messageBusConnectorClass,
            publish=Mock(spec_set=messageBusConnectorClass.publish))
        messageBusMock.__enter__.return_value = messageBusMock

        messageBusConnectorClassMock.return_value = messageBusMock

        loggerMock = Mock(spec_set=logging.Logger)
        with metric_utils.metricDataBatchWrite(loggerMock) as putSample:
            # put enough for the first batch
            for sample in samples[:metric_utils._METRIC_DATA_BATCH_WRITE_SIZE]:
                putSample(*sample)

            # The first publish call should be for a full batch
            self.assertEqual(messageBusMock.publish.call_count, 1)
            call0 = mock.call(
                mqName="taurus.metric.custom.data",
                persistent=True,
                body=json.dumps(
                    dict(protocol="plain",
                         data=[
                             "%s %r %d" % (m, v, t) for m, v, t in
                             samples[:metric_utils.
                                     _METRIC_DATA_BATCH_WRITE_SIZE]
                         ])))
            self.assertEqual(messageBusMock.publish.call_args_list[0], call0)

            # put the remaining samples
            for sample in samples[metric_utils._METRIC_DATA_BATCH_WRITE_SIZE:]:
                putSample(*sample)

            # the remaining incomplete batch will be sent upon exit from the context,
            # but not yet
            self.assertEqual(messageBusMock.publish.call_count, 1)

        # Now, the remainder should be sent, too
        self.assertEqual(messageBusMock.publish.call_count, 2)
        call1 = mock.call(
            mqName="taurus.metric.custom.data",
            persistent=True,
            body=json.dumps(
                dict(protocol="plain",
                     data=[
                         "%s %r %d" % (m, v, t) for m, v, t in
                         samples[metric_utils._METRIC_DATA_BATCH_WRITE_SIZE:]
                     ])))
        self.assertEqual(messageBusMock.publish.call_args_list[1], call1)
  def testMetricDataBatchWrite(self, messageBusConnectorClassMock):

    samples = [
      ("FOO.BAR.%d" % i, i * 3.789, i * 300)
      for i in xrange((metric_utils._METRIC_DATA_BATCH_WRITE_SIZE * 3) / 2)
    ]

    messageBusConnectorClass = (
      metric_utils.message_bus_connector.MessageBusConnector)
    messageBusMock = MagicMock(
      spec_set=messageBusConnectorClass,
      publish=Mock(spec_set=messageBusConnectorClass.publish))
    messageBusMock.__enter__.return_value = messageBusMock

    messageBusConnectorClassMock.return_value = messageBusMock

    loggerMock = Mock(spec_set=logging.Logger)
    with metric_utils.metricDataBatchWrite(loggerMock) as putSample:
      # put enough for the first batch
      for sample in samples[:metric_utils._METRIC_DATA_BATCH_WRITE_SIZE]:
        putSample(*sample)

      # The first publish call should be for a full batch
      self.assertEqual(messageBusMock.publish.call_count, 1)
      call0 = mock.call(
        mqName="taurus.metric.custom.data",
        persistent=True,
        body=json.dumps(
          dict(
            protocol="plain",
            data=["%s %r %d" % (m, v, t)
                  for m, v, t
                  in samples[:metric_utils._METRIC_DATA_BATCH_WRITE_SIZE]])))
      self.assertEqual(messageBusMock.publish.call_args_list[0], call0)

      # put the remaining samples
      for sample in samples[metric_utils._METRIC_DATA_BATCH_WRITE_SIZE:]:
        putSample(*sample)

      # the remaining incomplete batch will be sent upon exit from the context,
      # but not yet
      self.assertEqual(messageBusMock.publish.call_count, 1)

    # Now, the remainder should be sent, too
    self.assertEqual(messageBusMock.publish.call_count, 2)
    call1 = mock.call(
      mqName="taurus.metric.custom.data",
      persistent=True,
      body=json.dumps(
        dict(
          protocol="plain",
          data=["%s %r %d" % (m, v, t)
                for m, v, t
                in samples[metric_utils._METRIC_DATA_BATCH_WRITE_SIZE:]])))
    self.assertEqual(messageBusMock.publish.call_args_list[1], call1)
def _flushTaurusEngineMetricDataPath(engineServer, engineApiKey):
    """Flush Taurus Engine's metric data path.

  There is no formal mechanism for this in htmengine, so we're going to flush
  the data path by sending a metric data item for a dummy metric and wait for
  the dummy metric to be created (and then delete the dummy metric). It's a
  hack, but it's pretty much all we got right now.

  :param str engineServer: dns name of ip addres of Taurus API server

  :param str engineApiKey: API Key of Taurus HTM Engine
  """
    g_log.info("Flushing Taurus Engine metric data path, please wait...")

    flusherMetricName = _DATA_PATH_FLUSHER_METRIC_PREFIX + uuid.uuid1().hex

    with metric_utils.metricDataBatchWrite(g_log) as putSample:
        putSample(flusherMetricName, 99999, int(time.time()))

    _waitForFlusherAndGarbageCollect(
        engineServer=engineServer, engineApiKey=engineApiKey, flusherMetricName=flusherMetricName
    )
def _flushTaurusEngineMetricDataPath(engineServer, engineApiKey):
    """Flush Taurus Engine's metric data path.

  There is no formal mechanism for this in htmengine, so we're going to flush
  the data path by sending a metric data item for a dummy metric and wait for
  the dummy metric to be created (and then delete the dummy metric). It's a
  hack, but it's pretty much all we got right now.

  :param str engineServer: dns name of ip addres of Taurus API server

  :param str engineApiKey: API Key of Taurus HTM Engine
  """
    g_log.info("Flushing Taurus Engine metric data path, please wait...")

    flusherMetricName = _DATA_PATH_FLUSHER_METRIC_PREFIX + uuid.uuid1().hex

    with metric_utils.metricDataBatchWrite(g_log) as putSample:
        putSample(flusherMetricName, 99999, int(time.time()))

    _waitForFlusherAndGarbageCollect(engineServer=engineServer,
                                     engineApiKey=engineApiKey,
                                     flusherMetricName=flusherMetricName)
def _forwardNewsVolumeMetrics(metricSpecs,
                              lastEmittedAggTime,
                              stopDatetime,
                              periodSec,
                              metricDestAddr):
  """ Query news volume metrics since the given last emitted timestamp through
  stopDatetime and forward them to htmengine's Metric Listener. Update the
  datetime of the last successfully-emitted news volume metric batch in the
  database.

  NOTE: forwarding will be aborted upon failure to connect to Metic Listener. In
    this case, an error will be logged, and the function will return the UTC
    timestamp of the last successfully-emitted sample aggregation interval. Once
    Metric Listener comes online, a subsequent call to this function will catch
    up by forwarding the stored samples since last successful emission.

  :param metrics: a sequence of NewsVolumeMetricSpec objects corresponding to
    the metrics to be emitted
  :param lastEmittedAggTime: UTC datetime of last successfully-emitted sample
    batch
  :param stopDatetime: non-inclusive upper bound UTC datetime for forwarding
  :param periodSec: aggregation period in seconds
  :param metricDestAddr: two-tuple (metricDestHost, metricDestPort)
  :returns: UTC timestamp of the last successfully-emitted sample batch.
  :rtype: datetime.datetime
  """
  periodTimedelta = timedelta(seconds=periodSec)
  aggStartDatetime = lastEmittedAggTime + periodTimedelta
  while aggStartDatetime < stopDatetime:
    # Get News Volume metrics for one aggregation interval
    aggStopDatetime = aggStartDatetime + periodTimedelta
    symbolToNewsVolumeMap = defaultdict(
      int,
      _queryNewsVolumes(aggStartDatetime, aggStopDatetime))

    # Generate metric samples
    epochTimestamp = date_time_utils.epochFromNaiveUTCDatetime(aggStartDatetime)
    samples = tuple(
      dict(
        metricName=spec.metric,
        value=symbolToNewsVolumeMap[spec.symbol],
        epochTimestamp=epochTimestamp)
      for spec in metricSpecs
    )

    # Emit samples to Metric Listener
    try:
      with metric_utils.metricDataBatchWrite(log=g_log) as putSample:
        for sample in samples:
          putSample(**sample)
    except Exception:
      g_log.exception("Failure while emitting metric data for agg=%s "
                      "containing numSamples=%d",
                      aggStartDatetime, len(samples))
      return lastEmittedAggTime
    else:
      g_log.info("Forwarded numSamples=%d for agg=%s",
                 len(samples), aggStartDatetime)

    # Update db with last successfully-emitted datetime
    metric_utils.updateLastEmittedSampleDatetime(
      key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY,
      sampleDatetime=aggStartDatetime)

    # Set up for next iteration
    lastEmittedAggTime = aggStartDatetime
    aggStartDatetime = aggStopDatetime


  return lastEmittedAggTime
Example #6
0
def _forwardNewsVolumeMetrics(metricSpecs, lastEmittedAggTime, stopDatetime,
                              periodSec, metricDestAddr):
    """ Query news volume metrics since the given last emitted timestamp through
  stopDatetime and forward them to htmengine's Metric Listener. Update the
  datetime of the last successfully-emitted news volume metric batch in the
  database.

  NOTE: forwarding will be aborted upon failure to connect to Metic Listener. In
    this case, an error will be logged, and the function will return the UTC
    timestamp of the last successfully-emitted sample aggregation interval. Once
    Metric Listener comes online, a subsequent call to this function will catch
    up by forwarding the stored samples since last successful emission.

  :param metrics: a sequence of NewsVolumeMetricSpec objects corresponding to
    the metrics to be emitted
  :param lastEmittedAggTime: UTC datetime of last successfully-emitted sample
    batch
  :param stopDatetime: non-inclusive upper bound UTC datetime for forwarding
  :param periodSec: aggregation period in seconds
  :param metricDestAddr: two-tuple (metricDestHost, metricDestPort)
  :returns: UTC timestamp of the last successfully-emitted sample batch.
  :rtype: datetime.datetime
  """
    periodTimedelta = timedelta(seconds=periodSec)
    aggStartDatetime = lastEmittedAggTime + periodTimedelta
    while aggStartDatetime < stopDatetime:
        # Get News Volume metrics for one aggregation interval
        aggStopDatetime = aggStartDatetime + periodTimedelta
        symbolToNewsVolumeMap = defaultdict(
            int, _queryNewsVolumes(aggStartDatetime, aggStopDatetime))

        # Generate metric samples
        epochTimestamp = date_time_utils.epochFromNaiveUTCDatetime(
            aggStartDatetime)
        samples = tuple(
            dict(metricName=spec.metric,
                 value=symbolToNewsVolumeMap[spec.symbol],
                 epochTimestamp=epochTimestamp) for spec in metricSpecs)

        # Emit samples to Metric Listener
        try:
            with metric_utils.metricDataBatchWrite(log=g_log) as putSample:
                for sample in samples:
                    putSample(**sample)
        except Exception:
            g_log.exception(
                "Failure while emitting metric data for agg=%s "
                "containing numSamples=%d", aggStartDatetime, len(samples))
            return lastEmittedAggTime
        else:
            g_log.info("Forwarded numSamples=%d for agg=%s", len(samples),
                       aggStartDatetime)

        # Update db with last successfully-emitted datetime
        metric_utils.updateLastEmittedSampleDatetime(
            key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY,
            sampleDatetime=aggStartDatetime)

        # Set up for next iteration
        lastEmittedAggTime = aggStartDatetime
        aggStartDatetime = aggStopDatetime

    return lastEmittedAggTime
  def testMetricDataBatchWrite(self):

    # Note: This test assumes that there is a running Taurus instance ready to
    # receive and process inbound custom metric data.  In the deployed
    # environment $TAURUS_HTM_SERVER and $TAURUS_APIKEY must be set.  Otherwise
    # default values will be assumed.

    host = os.environ.get("TAURUS_HTM_SERVER", "127.0.0.1")
    apikey = os.environ.get("TAURUS_APIKEY", "taurus")

    metricName = "bogus-test-metric"

    _LOG = ExtendedLogger.getExtendedLogger(__name__)

    UTC_LOCALIZED_EPOCH = (
      pytz.timezone("UTC").localize(datetime.utcfromtimestamp(0)))

    now = datetime.now(pytz.timezone("UTC"))

    # Send metric data in batches, and for test purposes making sure to exceed
    # the max batch size to force the batch to be chunked

    with metric_utils.metricDataBatchWrite(log=_LOG) as putSample:
      for x in xrange(metric_utils._METRIC_DATA_BATCH_WRITE_SIZE + 1):
        ts = ((now - UTC_LOCALIZED_EPOCH).total_seconds()
              - metric_utils._METRIC_DATA_BATCH_WRITE_SIZE
              + 1
              + x)
        putSample(metricName=metricName,
                  value=x,
                  epochTimestamp=ts)

    self.addCleanup(requests.delete,
                    "https://%s/_metrics/custom/%s" % (host, metricName),
                    auth=(apikey, ""),
                    verify=False)

    attempt = 0
    found = False
    while not found:
      result = requests.get("https://%s/_metrics/custom" % host,
                            auth=(apikey, ""),
                            verify=False)

      models = result.json()

      for model in models:
        if model["name"] == metricName:
          # Quick check to make sure the data made its way through
          result = requests.get("https://%s/_models/%s" % (host, model["uid"]),
                                auth=(apikey, ""),
                                verify=False)

          if (result.json()[0]["last_rowid"] ==
              metric_utils._METRIC_DATA_BATCH_WRITE_SIZE + 1):
            found = True
            break

      else:
        if attempt == 30:
          self.fail(
            "Not all metric data samples made it through after 30 seconds")
        else:
          time.sleep(1)
          attempt += 1
          continue
def transmitMetricData(metricSpecs, symbol, engine):
  """ Send unsent metric data samples for the given symbol to Taurus

  NOTE: this is also used externally by friends of the agent; e.g.,
  `resymbol_metrics.py`.

  :param metricSpecs: Sequence of one or more StockMetricSpec objects associated
    with the same stock symbol for which polling was conducted
  :param symbol: stock symbol
  :param sqlalchemy.engine.Engine engine:
  """
  try:
    @collectorsdb.retryOnTransientErrors
    def _fetchUnsentSamples():
      # Select only records that haven't been sent to BOTH
      fields = [
        xigniteSecurityBars.c.StartDate,
        xigniteSecurityBars.c.StartTime,
        xigniteSecurityBars.c.EndDate,
        xigniteSecurityBars.c.EndTime,
        xigniteSecurityBars.c.UTCOffset,
        xigniteSecurityBars.c.Volume,
        xigniteSecurityBars.c.Close,
        emittedStockPrice.c.sent.label("Close_sent"),
        emittedStockVolume.c.sent.label("Volume_sent")
      ]

      sel = (select(fields)
             .select_from(xigniteSecurityBars
                          .outerjoin(emittedStockPrice)
                          .outerjoin(emittedStockVolume))
             .where(xigniteSecurityBars.c.symbol == symbol)
             .where((emittedStockPrice.c.sent == None) |
                    (emittedStockVolume.c.sent == None))
             .order_by(xigniteSecurityBars.c.EndDate.asc(),
                       xigniteSecurityBars.c.EndTime.asc())
      )

      return engine.execute(sel)


    # Process samples in chunks to facilitate more efficient error recovery
    # during backlog processing
    samplesIter = iter(_fetchUnsentSamples())
    while True:
      specSymbolSampleList = []
      sample = None
      for sample in itertools.islice(samplesIter, 0, 1000):
        for spec in metricSpecs:
          if not sample[spec.sampleKey + "_sent"]:
            specSymbolSampleList.append((spec, symbol, sample))

      if sample is None:
        # No more unsent samples
        break

      # Send samples to Taurus
      with metricDataBatchWrite(log=_LOG) as putSample:
        for spec, symbol, sample in specSymbolSampleList:
          if spec.sampleKey in sample:
            epochTs = epochFromLocalizedDatetime(
              _EASTERN_TZ.localize(
                datetime.datetime.combine(sample.StartDate, sample.StartTime)))
            value = sample[spec.sampleKey]

            _LOG.info("Sending: %s %r %d", spec.metricName, value, epochTs)
            putSample(metricName=spec.metricName,
                      value=value,
                      epochTimestamp=epochTs)

      # Update history of emitted samples
      #
      # NOTE: If this fails once in a while and we end up resending the samples,
      # htmengine's Metric Storer will discard duplicate-timestamp and
      # out-of-order samples
      for spec, symbol, sample in specSymbolSampleList:
        _updateMetricDataHistory(spec=spec, symbol=symbol, sample=sample,
                                 engine=engine)
  except Exception:
    _LOG.exception("Unexpected error while attempting to send metric "
                   "data sample(s) to remote Taurus instance.")
Example #9
0
def transmitMetricData(metricSpecs, symbol, engine):
    """ Send unsent metric data samples for the given symbol to Taurus

  NOTE: this is also used externally by friends of the agent; e.g.,
  `resymbol_metrics.py`.

  :param metricSpecs: Sequence of one or more StockMetricSpec objects associated
    with the same stock symbol for which polling was conducted
  :param symbol: stock symbol
  :param sqlalchemy.engine.Engine engine:
  """
    try:

        @collectorsdb.retryOnTransientErrors
        def _fetchUnsentSamples():
            # Select only records that haven't been sent to BOTH
            fields = [
                xigniteSecurityBars.c.StartDate,
                xigniteSecurityBars.c.StartTime, xigniteSecurityBars.c.EndDate,
                xigniteSecurityBars.c.EndTime, xigniteSecurityBars.c.UTCOffset,
                xigniteSecurityBars.c.Volume, xigniteSecurityBars.c.Close,
                emittedStockPrice.c.sent.label("Close_sent"),
                emittedStockVolume.c.sent.label("Volume_sent")
            ]

            sel = (select(fields).select_from(
                xigniteSecurityBars.outerjoin(emittedStockPrice).outerjoin(
                    emittedStockVolume)).where(
                        xigniteSecurityBars.c.symbol == symbol).where(
                            (emittedStockPrice.c.sent == None)
                            | (emittedStockVolume.c.sent == None)).order_by(
                                xigniteSecurityBars.c.EndDate.asc(),
                                xigniteSecurityBars.c.EndTime.asc()))

            return engine.execute(sel)

        # Process samples in chunks to facilitate more efficient error recovery
        # during backlog processing
        samplesIter = iter(_fetchUnsentSamples())
        while True:
            specSymbolSampleList = []
            sample = None
            for sample in itertools.islice(samplesIter, 0, 1000):
                for spec in metricSpecs:
                    if not sample[spec.sampleKey + "_sent"]:
                        specSymbolSampleList.append((spec, symbol, sample))

            if sample is None:
                # No more unsent samples
                break

            # Send samples to Taurus
            with metricDataBatchWrite(log=_LOG) as putSample:
                for spec, symbol, sample in specSymbolSampleList:
                    if spec.sampleKey in sample:
                        epochTs = epochFromLocalizedDatetime(
                            _EASTERN_TZ.localize(
                                datetime.datetime.combine(
                                    sample.StartDate, sample.StartTime)))
                        value = sample[spec.sampleKey]

                        _LOG.info("Sending: %s %r %d", spec.metricName, value,
                                  epochTs)
                        putSample(metricName=spec.metricName,
                                  value=value,
                                  epochTimestamp=epochTs)

            # Update history of emitted samples
            #
            # NOTE: If this fails once in a while and we end up resending the samples,
            # htmengine's Metric Storer will discard duplicate-timestamp and
            # out-of-order samples
            for spec, symbol, sample in specSymbolSampleList:
                _updateMetricDataHistory(spec=spec,
                                         symbol=symbol,
                                         sample=sample,
                                         engine=engine)
    except Exception:
        _LOG.exception("Unexpected error while attempting to send metric "
                       "data sample(s) to remote Taurus instance.")
Example #10
0
    def testMetricDataBatchWrite(self):

        # Note: This test assumes that there is a running Taurus instance ready to
        # receive and process inbound custom metric data.  In the deployed
        # environment $TAURUS_HTM_SERVER and $TAURUS_APIKEY must be set.  Otherwise
        # default values will be assumed.

        host = os.environ.get("TAURUS_HTM_SERVER", "127.0.0.1")
        apikey = os.environ.get("TAURUS_APIKEY", "taurus")

        metricName = "bogus-test-metric"

        _LOG = ExtendedLogger.getExtendedLogger(__name__)

        UTC_LOCALIZED_EPOCH = (pytz.timezone("UTC").localize(
            datetime.utcfromtimestamp(0)))

        now = datetime.now(pytz.timezone("UTC"))

        # Send metric data in batches, and for test purposes making sure to exceed
        # the max batch size to force the batch to be chunked

        with metric_utils.metricDataBatchWrite(log=_LOG) as putSample:
            for x in xrange(metric_utils._METRIC_DATA_BATCH_WRITE_SIZE + 1):
                ts = ((now - UTC_LOCALIZED_EPOCH).total_seconds() -
                      metric_utils._METRIC_DATA_BATCH_WRITE_SIZE + 1 + x)
                putSample(metricName=metricName, value=x, epochTimestamp=ts)

        self.addCleanup(requests.delete,
                        "https://%s/_metrics/custom/%s" % (host, metricName),
                        auth=(apikey, ""),
                        verify=False)

        attempt = 0
        found = False
        while not found:
            result = requests.get("https://%s/_metrics/custom" % host,
                                  auth=(apikey, ""),
                                  verify=False)

            models = result.json()

            for model in models:
                if model["name"] == metricName:
                    # Quick check to make sure the data made its way through
                    result = requests.get("https://%s/_models/%s" %
                                          (host, model["uid"]),
                                          auth=(apikey, ""),
                                          verify=False)

                    if (result.json()[0]["last_rowid"] ==
                            metric_utils._METRIC_DATA_BATCH_WRITE_SIZE + 1):
                        found = True
                        break

            else:
                if attempt == 30:
                    self.fail(
                        "Not all metric data samples made it through after 30 seconds"
                    )
                else:
                    time.sleep(1)
                    attempt += 1
                    continue