def testEmittedSampleDatetime(self): key = "bogus-test-key" # Establish initial sample datetime result = metric_utils.establishLastEmittedSampleDatetime(key, 300) # Cleanup self.addCleanup(collectorsdb.engineFactory().execute, schema.emittedSampleTracker.delete().where( (schema.emittedSampleTracker.c.key == key) ) ) self.assertIsInstance(result, datetime) # Update latest emitted sample datetime to now now = datetime.utcnow().replace(microsecond=0) metric_utils.updateLastEmittedSampleDatetime(key, now) # Verify that it was updated lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key) self.assertEqual(now, lastEmittedSample) self.assertLess(result, lastEmittedSample)
def testEmittedSampleDatetime(self): key = "bogus-test-key" # Establish initial sample datetime result = metric_utils.establishLastEmittedSampleDatetime(key, 300) # Cleanup self.addCleanup( collectorsdb.engineFactory().execute, schema.emittedSampleTracker.delete().where( (schema.emittedSampleTracker.c.key == key))) self.assertIsInstance(result, datetime) # Update latest emitted sample datetime to now now = datetime.utcnow().replace(microsecond=0) metric_utils.updateLastEmittedSampleDatetime(key, now) # Verify that it was updated lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key) self.assertEqual(now, lastEmittedSample) self.assertLess(result, lastEmittedSample)
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod): """ Perform the workflow of resymboling a tweet volume metric that consists of the following steps: 1. Reassign bufferred tweet samples in collectorsdb to the new metric. 2. Forward the new metric data samples to HTM Engine 3. Forward the tweet media to dynamodb :param str oldSymbol: old stock symbol, upper case :param str newSymbol: new stock symbol, upper case :param int aggPeriod: metric aggregation period in seconds """ g_log.info( "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s", oldSymbol, newSymbol, aggPeriod) oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol) newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol) sqlEngine = collectorsdb.engineFactory() # Rename the metric in tweet sample rows with sqlEngine.begin() as conn: # Verify that metric samples with new symbol don't overlap with with samples # corresponding to the old symbol g_log.info( "Verifying that newMetric=%s in table=%s doesn't overlap with " "the oldMetric=%s.", newMetricName, schema.twitterTweetSamples, oldMetricName) maxOldMetricAggTimestamp = conn.execute( sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts) ])).scalar() if maxOldMetricAggTimestamp is not None: overlappingRow = conn.execute( sql.select([ schema.twitterTweetSamples.c.metric ]).where(schema.twitterTweetSamples.c.metric == newMetricName). where(schema.twitterTweetSamples.c.agg_ts <= maxOldMetricAggTimestamp).order_by( schema.twitterTweetSamples.c.agg_ts.asc()).limit( 1)).first() assert overlappingRow is None, overlappingRow # Re-symbol the tweet sample metric rows g_log.info("Renaming tweet sample metric %s with %s", oldMetricName, newMetricName) conn.execute( schema.twitterTweetSamples # pylint: disable=E1120 .update().where( schema.twitterTweetSamples.c.metric == oldMetricName).values( metric=newMetricName)) # Forward tweet metric samples to Taurus Engine g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...", newMetricName) # Get the aggregation timestamp of the starting tweet sample to forward # # NOTE: prior to March 2015, tweet samples didn't have a consistent reference # between twitter agent's restarts. This issue was address with the # introduction of emitted_sample_tracker table. # timestampScanLowerBound = (datetime.utcnow() - timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS)) aggStartDatetime = sqlEngine.execute( sql.select( [schema.twitterTweetSamples.c.agg_ts], order_by=schema.twitterTweetSamples.c.agg_ts.asc()).where( schema.twitterTweetSamples.c.metric == newMetricName).where( schema.twitterTweetSamples.c.agg_ts > timestampScanLowerBound).limit(1)).scalar() # Get the timestamp of the most recent sample batch emitted to Taurus engine lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY) if lastEmittedAggTime is None: # Last emitted sample datetime has not been established yet; we'll rely # on the twitter agent to forward all metric samples to HTM engine g_log.info( "Last emitted sample datetime has not been established yet; " "deferring metric sample forwarding to Twitter Agent.") return metricDataForwarder = twitter_direct_agent.MetricDataForwarder( metricSpecs=twitter_direct_agent.loadMetricSpecs(), aggSec=aggPeriod) metricDataForwarder.aggregateAndForward(aggStartDatetime=aggStartDatetime, stopDatetime=lastEmittedAggTime + timedelta(seconds=aggPeriod), metrics=[newMetricName]) # Forward tweet media to dynamodb g_log.info("Forwarding twitter tweets to dynamodb using new symbol...") migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod): """ Perform the workflow of resymboling a tweet volume metric that consists of the following steps: 1. Reassign bufferred tweet samples in collectorsdb to the new metric. 2. Forward the new metric data samples to HTM Engine 3. Forward the tweet media to dynamodb :param str oldSymbol: old stock symbol, upper case :param str newSymbol: new stock symbol, upper case :param int aggPeriod: metric aggregation period in seconds """ g_log.info( "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s", oldSymbol, newSymbol, aggPeriod) oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol) newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol) sqlEngine = collectorsdb.engineFactory() # Rename the metric in tweet sample rows with sqlEngine.begin() as conn: # Verify that metric samples with new symbol don't overlap with with samples # corresponding to the old symbol g_log.info("Verifying that newMetric=%s in table=%s doesn't overlap with " "the oldMetric=%s.", newMetricName, schema.twitterTweetSamples, oldMetricName) maxOldMetricAggTimestamp = conn.execute( sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts)]) ).scalar() if maxOldMetricAggTimestamp is not None: overlappingRow = conn.execute( sql.select([schema.twitterTweetSamples.c.metric]) .where(schema.twitterTweetSamples.c.metric == newMetricName) .where(schema.twitterTweetSamples.c.agg_ts <= maxOldMetricAggTimestamp) .order_by(schema.twitterTweetSamples.c.agg_ts.asc()) .limit(1)).first() assert overlappingRow is None, overlappingRow # Re-symbol the tweet sample metric rows g_log.info("Renaming tweet sample metric %s with %s", oldMetricName, newMetricName) conn.execute( schema.twitterTweetSamples # pylint: disable=E1120 .update() .where(schema.twitterTweetSamples.c.metric == oldMetricName) .values(metric=newMetricName)) # Forward tweet metric samples to Taurus Engine g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...", newMetricName) # Get the aggregation timestamp of the starting tweet sample to forward # # NOTE: prior to March 2015, tweet samples didn't have a consistent reference # between twitter agent's restarts. This issue was address with the # introduction of emitted_sample_tracker table. # timestampScanLowerBound = (datetime.utcnow() - timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS)) aggStartDatetime = sqlEngine.execute( sql.select([schema.twitterTweetSamples.c.agg_ts], order_by=schema.twitterTweetSamples.c.agg_ts.asc()) .where(schema.twitterTweetSamples.c.metric == newMetricName) .where(schema.twitterTweetSamples.c.agg_ts > timestampScanLowerBound) .limit(1)).scalar() # Get the timestamp of the most recent sample batch emitted to Taurus engine lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY) if lastEmittedAggTime is None: # Last emitted sample datetime has not been established yet; we'll rely # on the twitter agent to forward all metric samples to HTM engine g_log.info("Last emitted sample datetime has not been established yet; " "deferring metric sample forwarding to Twitter Agent.") return metricDataForwarder = twitter_direct_agent.MetricDataForwarder( metricSpecs=twitter_direct_agent.loadMetricSpecs(), aggSec=aggPeriod) metricDataForwarder.aggregateAndForward( aggStartDatetime=aggStartDatetime, stopDatetime=lastEmittedAggTime + timedelta(seconds=aggPeriod), metrics=[newMetricName]) # Forward tweet media to dynamodb g_log.info("Forwarding twitter tweets to dynamodb using new symbol...") migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])