コード例 #1
0
  def testEmittedSampleDatetime(self):
    key = "bogus-test-key"

    # Establish initial sample datetime

    result = metric_utils.establishLastEmittedSampleDatetime(key, 300)

    # Cleanup
    self.addCleanup(collectorsdb.engineFactory().execute,
      schema.emittedSampleTracker.delete().where(
        (schema.emittedSampleTracker.c.key == key)
      )
    )

    self.assertIsInstance(result, datetime)

    # Update latest emitted sample datetime to now

    now = datetime.utcnow().replace(microsecond=0)
    metric_utils.updateLastEmittedSampleDatetime(key, now)

    # Verify that it was updated

    lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key)

    self.assertEqual(now, lastEmittedSample)
    self.assertLess(result, lastEmittedSample)
コード例 #2
0
    def testEmittedSampleDatetime(self):
        key = "bogus-test-key"

        # Establish initial sample datetime

        result = metric_utils.establishLastEmittedSampleDatetime(key, 300)

        # Cleanup
        self.addCleanup(
            collectorsdb.engineFactory().execute,
            schema.emittedSampleTracker.delete().where(
                (schema.emittedSampleTracker.c.key == key)))

        self.assertIsInstance(result, datetime)

        # Update latest emitted sample datetime to now

        now = datetime.utcnow().replace(microsecond=0)
        metric_utils.updateLastEmittedSampleDatetime(key, now)

        # Verify that it was updated

        lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key)

        self.assertEqual(now, lastEmittedSample)
        self.assertLess(result, lastEmittedSample)
コード例 #3
0
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod):
    """ Perform the workflow of resymboling a tweet volume metric that consists of
  the following steps:
    1. Reassign bufferred tweet samples in collectorsdb to the new metric.
    2. Forward the new metric data samples to HTM Engine
    3. Forward the tweet media to dynamodb

  :param str oldSymbol: old stock symbol, upper case
  :param str newSymbol: new stock symbol, upper case
  :param int aggPeriod: metric aggregation period in seconds
  """
    g_log.info(
        "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s",
        oldSymbol, newSymbol, aggPeriod)

    oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol)
    newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol)

    sqlEngine = collectorsdb.engineFactory()

    # Rename the metric in tweet sample rows

    with sqlEngine.begin() as conn:
        # Verify that metric samples with new symbol don't overlap with with samples
        # corresponding to the old symbol
        g_log.info(
            "Verifying that newMetric=%s in table=%s doesn't overlap with "
            "the oldMetric=%s.", newMetricName, schema.twitterTweetSamples,
            oldMetricName)

        maxOldMetricAggTimestamp = conn.execute(
            sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts)
                        ])).scalar()

        if maxOldMetricAggTimestamp is not None:
            overlappingRow = conn.execute(
                sql.select([
                    schema.twitterTweetSamples.c.metric
                ]).where(schema.twitterTweetSamples.c.metric == newMetricName).
                where(schema.twitterTweetSamples.c.agg_ts <=
                      maxOldMetricAggTimestamp).order_by(
                          schema.twitterTweetSamples.c.agg_ts.asc()).limit(
                              1)).first()
            assert overlappingRow is None, overlappingRow

        # Re-symbol the tweet sample metric rows
        g_log.info("Renaming tweet sample metric %s with %s", oldMetricName,
                   newMetricName)
        conn.execute(
            schema.twitterTweetSamples  # pylint: disable=E1120
            .update().where(
                schema.twitterTweetSamples.c.metric == oldMetricName).values(
                    metric=newMetricName))

    # Forward tweet metric samples to Taurus Engine

    g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...",
               newMetricName)

    # Get the aggregation timestamp of the starting tweet sample to forward
    #
    # NOTE: prior to March 2015, tweet samples didn't have a consistent reference
    # between twitter agent's restarts. This issue was address with the
    # introduction of emitted_sample_tracker table.
    #
    timestampScanLowerBound = (datetime.utcnow() -
                               timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS))

    aggStartDatetime = sqlEngine.execute(
        sql.select(
            [schema.twitterTweetSamples.c.agg_ts],
            order_by=schema.twitterTweetSamples.c.agg_ts.asc()).where(
                schema.twitterTweetSamples.c.metric == newMetricName).where(
                    schema.twitterTweetSamples.c.agg_ts >
                    timestampScanLowerBound).limit(1)).scalar()

    # Get the timestamp of the most recent sample batch emitted to Taurus engine
    lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime(
        key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY)

    if lastEmittedAggTime is None:
        # Last emitted sample datetime has not been established yet; we'll rely
        # on the twitter agent to forward all metric samples to HTM engine
        g_log.info(
            "Last emitted sample datetime has not been established yet; "
            "deferring metric sample forwarding to Twitter Agent.")
        return

    metricDataForwarder = twitter_direct_agent.MetricDataForwarder(
        metricSpecs=twitter_direct_agent.loadMetricSpecs(), aggSec=aggPeriod)

    metricDataForwarder.aggregateAndForward(aggStartDatetime=aggStartDatetime,
                                            stopDatetime=lastEmittedAggTime +
                                            timedelta(seconds=aggPeriod),
                                            metrics=[newMetricName])

    # Forward tweet media to dynamodb
    g_log.info("Forwarding twitter tweets to dynamodb using new symbol...")
    migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])
コード例 #4
0
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod):
  """ Perform the workflow of resymboling a tweet volume metric that consists of
  the following steps:
    1. Reassign bufferred tweet samples in collectorsdb to the new metric.
    2. Forward the new metric data samples to HTM Engine
    3. Forward the tweet media to dynamodb

  :param str oldSymbol: old stock symbol, upper case
  :param str newSymbol: new stock symbol, upper case
  :param int aggPeriod: metric aggregation period in seconds
  """
  g_log.info(
    "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s",
    oldSymbol, newSymbol, aggPeriod)

  oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol)
  newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol)

  sqlEngine = collectorsdb.engineFactory()

  # Rename the metric in tweet sample rows

  with sqlEngine.begin() as conn:
    # Verify that metric samples with new symbol don't overlap with with samples
    # corresponding to the old symbol
    g_log.info("Verifying that newMetric=%s in table=%s doesn't overlap with "
               "the oldMetric=%s.",
               newMetricName, schema.twitterTweetSamples, oldMetricName)

    maxOldMetricAggTimestamp = conn.execute(
      sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts)])
    ).scalar()

    if maxOldMetricAggTimestamp is not None:
      overlappingRow = conn.execute(
        sql.select([schema.twitterTweetSamples.c.metric])
        .where(schema.twitterTweetSamples.c.metric == newMetricName)
        .where(schema.twitterTweetSamples.c.agg_ts <= maxOldMetricAggTimestamp)
        .order_by(schema.twitterTweetSamples.c.agg_ts.asc())
        .limit(1)).first()
      assert overlappingRow is None, overlappingRow

    # Re-symbol the tweet sample metric rows
    g_log.info("Renaming tweet sample metric %s with %s",
               oldMetricName, newMetricName)
    conn.execute(
      schema.twitterTweetSamples  # pylint: disable=E1120
      .update()
      .where(schema.twitterTweetSamples.c.metric == oldMetricName)
      .values(metric=newMetricName))


  # Forward tweet metric samples to Taurus Engine

  g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...",
             newMetricName)

  # Get the aggregation timestamp of the starting tweet sample to forward
  #
  # NOTE: prior to March 2015, tweet samples didn't have a consistent reference
  # between twitter agent's restarts. This issue was address with the
  # introduction of emitted_sample_tracker table.
  #
  timestampScanLowerBound = (datetime.utcnow() -
                             timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS))

  aggStartDatetime = sqlEngine.execute(
    sql.select([schema.twitterTweetSamples.c.agg_ts],
               order_by=schema.twitterTweetSamples.c.agg_ts.asc())
    .where(schema.twitterTweetSamples.c.metric == newMetricName)
    .where(schema.twitterTweetSamples.c.agg_ts > timestampScanLowerBound)
    .limit(1)).scalar()

  # Get the timestamp of the most recent sample batch emitted to Taurus engine
  lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime(
    key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY)

  if lastEmittedAggTime is None:
    # Last emitted sample datetime has not been established yet; we'll rely
    # on the twitter agent to forward all metric samples to HTM engine
    g_log.info("Last emitted sample datetime has not been established yet; "
               "deferring metric sample forwarding to Twitter Agent.")
    return

  metricDataForwarder = twitter_direct_agent.MetricDataForwarder(
    metricSpecs=twitter_direct_agent.loadMetricSpecs(),
    aggSec=aggPeriod)

  metricDataForwarder.aggregateAndForward(
    aggStartDatetime=aggStartDatetime,
    stopDatetime=lastEmittedAggTime + timedelta(seconds=aggPeriod),
    metrics=[newMetricName])


  # Forward tweet media to dynamodb
  g_log.info("Forwarding twitter tweets to dynamodb using new symbol...")
  migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])