def _checkTwitterScreenNames(consumerKey, consumerSecret, accessToken, accessTokenSecret, errorReportEmailAwsRegion, errorReportEmailSesEndpoint, errorReportEmailSenderAddress, awsAccessKeyId, awsSecretAccessKey, errorReportEmailRecipients): """ Check if twitter screen names are still valid. Email notifications are sent for unmapped screen names. Each time an unmapped screen name is reported successfully, we add it to a table keeping track of unmapped screen names that were already reported -- to avoid duplicate emails reporting the same unmapped screen name. :param consumerKey: Twitter consumer key :param consumerSecret: Twitter consumer secret :param accessToken: Twitter access token :param accessTokenSecret: Twitter access token secret :param errorReportEmailAwsRegion: AWS region for error report email :type errorReportEmailAwsRegion: string :param errorReportEmailSesEndpoint: AWS/SES endpoint for error report email :type errorReportEmailSesEndpoint: string :param errorReportEmailSenderAddress: Sender address for error report email :type errorReportEmailSenderAddress: string :param awsAccessKeyId: AWS access key ID for error report email :type awsAccessKeyId: string :param awsSecretAccessKey: AWS secret access key for error report email :type awsSecretAccessKey: string :param errorReportEmailRecipients: Recipients error report email :type errorReportEmailRecipients: list of strings """ authHandler = tweepy.OAuthHandler(consumerKey, consumerSecret) authHandler.set_access_token(accessToken, accessTokenSecret) tweepyApi = tweepy.API(authHandler) # list of screen names metricSpecs = loadMetricSpecs() screenNames = [] for spec in metricSpecs: for screenName in spec.screenNames: screenNames.append(screenName.lower()) unmappedScreenNames = _resolveUnmappedScreenNames(tweepyApi, screenNames) if unmappedScreenNames: g_log.error("No mappings for screenNames=%s", unmappedScreenNames) _reportUnmappedScreenNames(unmappedScreenNames=unmappedScreenNames, awsRegion=errorReportEmailAwsRegion, sesEndpoint=errorReportEmailSesEndpoint, senderAddress=errorReportEmailSenderAddress, awsAccessKeyId=awsAccessKeyId, awsSecretAccessKey=awsSecretAccessKey, recipients=errorReportEmailRecipients) else: # clearing rows of twitter_handle_failures table _deleteScreenNameFailures() g_log.info("All screen names resolved successfully")
def _checkTwitterScreenNames( consumerKey, consumerSecret, accessToken, accessTokenSecret, errorReportEmailAwsRegion, errorReportEmailSesEndpoint, errorReportEmailSenderAddress, awsAccessKeyId, awsSecretAccessKey, errorReportEmailRecipients, ): """ Check if twitter screen names are still valid. Email notifications are sent for unmapped screen names. Each time an unmapped screen name is reported successfully, we add it to a table keeping track of unmapped screen names that were already reported -- to avoid duplicate emails reporting the same unmapped screen name. :param consumerKey: Twitter consumer key :param consumerSecret: Twitter consumer secret :param accessToken: Twitter access token :param accessTokenSecret: Twitter access token secret :param errorReportEmailAwsRegion: AWS region for error report email :type errorReportEmailAwsRegion: string :param errorReportEmailSesEndpoint: AWS/SES endpoint for error report email :type errorReportEmailSesEndpoint: string :param errorReportEmailSenderAddress: Sender address for error report email :type errorReportEmailSenderAddress: string :param awsAccessKeyId: AWS access key ID for error report email :type awsAccessKeyId: string :param awsSecretAccessKey: AWS secret access key for error report email :type awsSecretAccessKey: string :param errorReportEmailRecipients: Recipients error report email :type errorReportEmailRecipients: list of strings """ authHandler = tweepy.OAuthHandler(consumerKey, consumerSecret) authHandler.set_access_token(accessToken, accessTokenSecret) tweepyApi = tweepy.API(authHandler) # list of screen names metricSpecs = loadMetricSpecs() screenNames = [] for spec in metricSpecs: for screenName in spec.screenNames: screenNames.append(screenName.lower()) unmappedScreenNames = _resolveUnmappedScreenNames(tweepyApi, screenNames) if unmappedScreenNames: g_log.error("No mappings for screenNames=%s", unmappedScreenNames) _reportUnmappedScreenNames( unmappedScreenNames=unmappedScreenNames, awsRegion=errorReportEmailAwsRegion, sesEndpoint=errorReportEmailSesEndpoint, senderAddress=errorReportEmailSenderAddress, awsAccessKeyId=awsAccessKeyId, awsSecretAccessKey=awsSecretAccessKey, recipients=errorReportEmailRecipients, ) else: # clearing rows of twitter_handle_failures table _deleteScreenNameFailures() g_log.info("All screen names resolved successfully")
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport().initTool() try: options = _parseArgs() g_log.info("Verifying that agents are in hot_standby mode") for section in config.sections(): try: assert config.get(section, "opmode") == ApplicationConfig.OP_MODE_HOT_STANDBY except Exception, e: raise g_log.info("Verifying that the old symbol has been removed from the " "metrics configuration") for stockData in metric_utils.getMetricsConfiguration().itervalues(): assert stockData["symbol"] != options.old_symbol if options.twitter and (not options.stocks): g_log.info( "Migrating ONLY twitter data from old-symbol=%s " "to new-symbol=%s", options.old_symbol, options.new_symbol, ) elif options.stocks and (not options.twitter): g_log.info( "Migrating ONLY xignite stock data from old-symbol=%s " "to new-symbol=%s", options.old_symbol, options.new_symbol, ) raise NotImplementedError else: g_log.info( "Migrating BOTH twitter and xignite stock data from " "old-symbol=%s to new-symbol=%s", options.old_symbol, options.new_symbol, ) raise NotImplementedError oldSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.old_symbol) newSymbolTweetPrefix = "TWITTER.TWEET.HANDLE.{symbol}.".format(symbol=options.new_symbol) oldSymbolTweetMetricsList = [] with collectorsdb.engineFactory().begin() as conn: g_log.info("Renaming metrics to new symbol") if options.twitter: oldSymbolTweetsQuery = sql.select([tweetSamplesSchema]).where( tweetSamplesSchema.c.metric.contains(oldSymbolTweetPrefix) ) oldSymbolTweets = conn.execute(oldSymbolTweetsQuery) for tweetSample in oldSymbolTweets: newMetricName = "{newPrefix}{metric}".format( newPrefix=newSymbolTweetPrefix, metric=tweetSample.metric[len(oldSymbolTweetPrefix) :] ) if tweetSample.metric not in oldSymbolTweetMetricsList: oldSymbolTweetMetricsList.append(tweetSample.metric) updateSampleQuery = ( tweetSamplesSchema.update() .where(tweetSamplesSchema.c.seq == tweetSample.seq) .values(metric=newMetricName) ) conn.execute(updateSampleQuery) g_log.info("Forwarding new twitter metric data to Taurus engine...") if options.twitter: oldestRecordTs = conn.execute( sql.select([tweetSamplesSchema.c.agg_ts], order_by=tweetSamplesSchema.c.agg_ts.asc()) ).first()[0] lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, aggSec=options.aggPeriod ) aggOffset = ( math.ceil( (epochFromNaiveUTCDatetime(lastEmittedAggTime) - epochFromNaiveUTCDatetime(oldestRecordTs)) / options.aggPeriod ) * options.aggPeriod ) aggStartDatetime = ( lastEmittedAggTime - timedelta(seconds=aggOffset) - timedelta(seconds=options.aggPeriod) ) metric_utils.updateLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=aggStartDatetime ) MetricDataForwarder.runInThread( metricSpecs=loadMetricSpecs(), aggSec=options.aggPeriod, symbolList=[options.new_symbol], forwardOnlyBacklog=True, ) metric_utils.updateLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY, sampleDatetime=lastEmittedAggTime ) g_log.info("Forwarding metrics to dynamodb using new symbol...") if options.twitter: migrate_tweets_to_dynamodb.main(symbolList=[options.new_symbol]) g_log.info("Unmonitoring and deleting existing metrics associated with " "symbol=%s", options.old_symbol) oldModels = metric_utils.getSymbolModels(options.htmServer, options.apikey, options.old_symbol) for model in oldModels: metric_utils.unmonitorMetric(options.htmServer, options.apikey, model.uid) metric_utils.deleteMetric(options.htmServer, options.apikey, model.name)
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod): """ Perform the workflow of resymboling a tweet volume metric that consists of the following steps: 1. Reassign bufferred tweet samples in collectorsdb to the new metric. 2. Forward the new metric data samples to HTM Engine 3. Forward the tweet media to dynamodb :param str oldSymbol: old stock symbol, upper case :param str newSymbol: new stock symbol, upper case :param int aggPeriod: metric aggregation period in seconds """ g_log.info( "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s", oldSymbol, newSymbol, aggPeriod) oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol) newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol) sqlEngine = collectorsdb.engineFactory() # Rename the metric in tweet sample rows with sqlEngine.begin() as conn: # Verify that metric samples with new symbol don't overlap with with samples # corresponding to the old symbol g_log.info( "Verifying that newMetric=%s in table=%s doesn't overlap with " "the oldMetric=%s.", newMetricName, schema.twitterTweetSamples, oldMetricName) maxOldMetricAggTimestamp = conn.execute( sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts) ])).scalar() if maxOldMetricAggTimestamp is not None: overlappingRow = conn.execute( sql.select([ schema.twitterTweetSamples.c.metric ]).where(schema.twitterTweetSamples.c.metric == newMetricName). where(schema.twitterTweetSamples.c.agg_ts <= maxOldMetricAggTimestamp).order_by( schema.twitterTweetSamples.c.agg_ts.asc()).limit( 1)).first() assert overlappingRow is None, overlappingRow # Re-symbol the tweet sample metric rows g_log.info("Renaming tweet sample metric %s with %s", oldMetricName, newMetricName) conn.execute( schema.twitterTweetSamples # pylint: disable=E1120 .update().where( schema.twitterTweetSamples.c.metric == oldMetricName).values( metric=newMetricName)) # Forward tweet metric samples to Taurus Engine g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...", newMetricName) # Get the aggregation timestamp of the starting tweet sample to forward # # NOTE: prior to March 2015, tweet samples didn't have a consistent reference # between twitter agent's restarts. This issue was address with the # introduction of emitted_sample_tracker table. # timestampScanLowerBound = (datetime.utcnow() - timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS)) aggStartDatetime = sqlEngine.execute( sql.select( [schema.twitterTweetSamples.c.agg_ts], order_by=schema.twitterTweetSamples.c.agg_ts.asc()).where( schema.twitterTweetSamples.c.metric == newMetricName).where( schema.twitterTweetSamples.c.agg_ts > timestampScanLowerBound).limit(1)).scalar() # Get the timestamp of the most recent sample batch emitted to Taurus engine lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY) if lastEmittedAggTime is None: # Last emitted sample datetime has not been established yet; we'll rely # on the twitter agent to forward all metric samples to HTM engine g_log.info( "Last emitted sample datetime has not been established yet; " "deferring metric sample forwarding to Twitter Agent.") return metricDataForwarder = twitter_direct_agent.MetricDataForwarder( metricSpecs=twitter_direct_agent.loadMetricSpecs(), aggSec=aggPeriod) metricDataForwarder.aggregateAndForward(aggStartDatetime=aggStartDatetime, stopDatetime=lastEmittedAggTime + timedelta(seconds=aggPeriod), metrics=[newMetricName]) # Forward tweet media to dynamodb g_log.info("Forwarding twitter tweets to dynamodb using new symbol...") migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])
def _resymbolTweetVolumeMetric(oldSymbol, newSymbol, aggPeriod): """ Perform the workflow of resymboling a tweet volume metric that consists of the following steps: 1. Reassign bufferred tweet samples in collectorsdb to the new metric. 2. Forward the new metric data samples to HTM Engine 3. Forward the tweet media to dynamodb :param str oldSymbol: old stock symbol, upper case :param str newSymbol: new stock symbol, upper case :param int aggPeriod: metric aggregation period in seconds """ g_log.info( "Renaming tweet sample metric: oldSymbol=%s, newSymbol=%s, aggPeriod=%s", oldSymbol, newSymbol, aggPeriod) oldMetricName = gen_metrics_config.getTweetVolumeMetricName(oldSymbol) newMetricName = gen_metrics_config.getTweetVolumeMetricName(newSymbol) sqlEngine = collectorsdb.engineFactory() # Rename the metric in tweet sample rows with sqlEngine.begin() as conn: # Verify that metric samples with new symbol don't overlap with with samples # corresponding to the old symbol g_log.info("Verifying that newMetric=%s in table=%s doesn't overlap with " "the oldMetric=%s.", newMetricName, schema.twitterTweetSamples, oldMetricName) maxOldMetricAggTimestamp = conn.execute( sql.select([sql.func.max(schema.twitterTweetSamples.c.agg_ts)]) ).scalar() if maxOldMetricAggTimestamp is not None: overlappingRow = conn.execute( sql.select([schema.twitterTweetSamples.c.metric]) .where(schema.twitterTweetSamples.c.metric == newMetricName) .where(schema.twitterTweetSamples.c.agg_ts <= maxOldMetricAggTimestamp) .order_by(schema.twitterTweetSamples.c.agg_ts.asc()) .limit(1)).first() assert overlappingRow is None, overlappingRow # Re-symbol the tweet sample metric rows g_log.info("Renaming tweet sample metric %s with %s", oldMetricName, newMetricName) conn.execute( schema.twitterTweetSamples # pylint: disable=E1120 .update() .where(schema.twitterTweetSamples.c.metric == oldMetricName) .values(metric=newMetricName)) # Forward tweet metric samples to Taurus Engine g_log.info("Forwarding new tweet metric=%s samples to Taurus engine...", newMetricName) # Get the aggregation timestamp of the starting tweet sample to forward # # NOTE: prior to March 2015, tweet samples didn't have a consistent reference # between twitter agent's restarts. This issue was address with the # introduction of emitted_sample_tracker table. # timestampScanLowerBound = (datetime.utcnow() - timedelta(days=MAX_METRIC_SAMPLE_BACKLOG_DAYS)) aggStartDatetime = sqlEngine.execute( sql.select([schema.twitterTweetSamples.c.agg_ts], order_by=schema.twitterTweetSamples.c.agg_ts.asc()) .where(schema.twitterTweetSamples.c.metric == newMetricName) .where(schema.twitterTweetSamples.c.agg_ts > timestampScanLowerBound) .limit(1)).scalar() # Get the timestamp of the most recent sample batch emitted to Taurus engine lastEmittedAggTime = metric_utils.queryLastEmittedSampleDatetime( key=_EMITTED_TWEET_VOLUME_SAMPLE_TRACKER_KEY) if lastEmittedAggTime is None: # Last emitted sample datetime has not been established yet; we'll rely # on the twitter agent to forward all metric samples to HTM engine g_log.info("Last emitted sample datetime has not been established yet; " "deferring metric sample forwarding to Twitter Agent.") return metricDataForwarder = twitter_direct_agent.MetricDataForwarder( metricSpecs=twitter_direct_agent.loadMetricSpecs(), aggSec=aggPeriod) metricDataForwarder.aggregateAndForward( aggStartDatetime=aggStartDatetime, stopDatetime=lastEmittedAggTime + timedelta(seconds=aggPeriod), metrics=[newMetricName]) # Forward tweet media to dynamodb g_log.info("Forwarding twitter tweets to dynamodb using new symbol...") migrate_tweets_to_dynamodb.migrate(metrics=[newMetricName])