def establishLastEmittedSampleDatetime(key, aggSec): """ Query UTC timestamp of the last emitted sample batch; if one hasn't been saved yet, then synthesize one, using negative aggregation period offset from current time :param int aggSec: aggregation period in seconds :returns: (possibly synthesized) UTC timestamp of the last successfully-emitted sample batch :rtype: datetime.datetime """ lastEmittedTimestamp = queryLastEmittedSampleDatetime(key) if lastEmittedTimestamp is not None: return lastEmittedTimestamp # Start at the present to avoid re-sending metric data that we may have # already sent to Taurus. lastEmittedTimestamp = (datetime.utcnow().replace(microsecond=0) - timedelta(seconds=aggSec)) collectorsdb.engineFactory().execute( schema.emittedSampleTracker.insert().prefix_with( "IGNORE", dialect="mysql").values(key=key, sample_ts=lastEmittedTimestamp)) # Query again after saving to account for mysql's loss of accuracy return queryLastEmittedSampleDatetime(key)
def updateLastEmittedNonMetricSequence(key, seq): """ Update the last emitted sample timestamp value in the database for the Tweet Volume metrics :param str key: caller's key in schema.emittedNonMetricTracker :param int seq: sequence of last successfully-emitted non-metric """ update = schema.emittedNonMetricTracker.update( # pylint: disable=E1120 ).values( last_seq=seq ).where( (schema.emittedNonMetricTracker.c.key == key) ) result = collectorsdb.engineFactory().execute(update) # If update didn't find the key, then insert # # NOTE: sqlalchemy doesn't support "ON DUPLICATE KEY UPDATE" in its syntactic # sugar; see https://bitbucket.org/zzzeek/sqlalchemy/issue/960 if result.rowcount == 0: # The row didn't exist, so create it collectorsdb.engineFactory().execute( schema.emittedNonMetricTracker.insert() # pylint: disable=E1120 .values(key=key, last_seq=seq))
def testEngineFactorySingletonPattern(self, sqlalchemyMock): # Explicitly spec out sqlalchemy.create_engine() firstCall = Mock(spec_set=sqlalchemy.engine.base.Engine) secondCall = Mock(spec_set=sqlalchemy.engine.base.Engine) sqlalchemyMock.create_engine.side_effect = iter( [firstCall, secondCall]) # Call collectorsdb.engineFactory() engine = collectorsdb.engineFactory() self.assertIs(engine, firstCall) # Call collectorsdb.engineFactory() again and assert singleton engine2 = collectorsdb.engineFactory() self.assertIs(engine2, firstCall) self.assertEqual(sqlalchemyMock.create_engine.call_count, 1) # Call collectorsdb.engineFactory() in different process, assert new # instance with patch("taurus.metric_collectors.collectorsdb.os", autospec=True) as osMock: osMock.getpid.return_value = collectorsdb._EngineSingleton._pid + 1 engine3 = collectorsdb.engineFactory() self.assertTrue(engine.dispose.called) self.assertIs(engine3, secondCall)
def establishLastEmittedSampleDatetime(key, aggSec): """ Query UTC timestamp of the last emitted sample batch; if one hasn't been saved yet, then synthesize one, using negative aggregation period offset from current time :param int aggSec: aggregation period in seconds :returns: (possibly synthesized) UTC timestamp of the last successfully-emitted sample batch :rtype: datetime.datetime """ lastEmittedTimestamp = queryLastEmittedSampleDatetime(key) if lastEmittedTimestamp is not None: return lastEmittedTimestamp # Start at the present to avoid re-sending metric data that we may have # already sent to Taurus. lastEmittedTimestamp = (datetime.utcnow().replace(microsecond=0) - timedelta(seconds=aggSec)) collectorsdb.engineFactory().execute( schema.emittedSampleTracker.insert( ).prefix_with("IGNORE", dialect="mysql" ).values(key=key, sample_ts=lastEmittedTimestamp)) # Query again after saving to account for mysql's loss of accuracy return queryLastEmittedSampleDatetime(key)
def updateLastEmittedNonMetricSequence(key, seq): """ Update the last emitted sample timestamp value in the database for the News Volume metrics :param str key: caller's key in schema.emittedNonMetricTracker :param int seq: sequence of last successfully-emitted non-metric """ update = schema.emittedNonMetricTracker.update( # pylint: disable=E1120 ).values( last_seq=seq ).where( (schema.emittedNonMetricTracker.c.key == key) ) result = collectorsdb.engineFactory().execute(update) # If update didn't find the key, then insert # # NOTE: sqlalchemy doesn't support "ON DUPLICATE KEY UPDATE" in its syntactic # sugar; see https://bitbucket.org/zzzeek/sqlalchemy/issue/960 if result.rowcount == 0: # The row didn't exist, so create it collectorsdb.engineFactory().execute( schema.emittedNonMetricTracker.insert() # pylint: disable=E1120 .values(key=key, last_seq=seq))
def _flagUnknownSymbolAsReported(symbol): """ Flag unknown company symbol as reported in database :param str symbol: symbol of the company's security (e.g., "AAPL") """ ins = schema.companySymbolFailures.insert().prefix_with("IGNORE", dialect="mysql").values(symbol=symbol) collectorsdb.engineFactory().execute(ins) g_log.debug("Saved unknown company symbol=%s", symbol)
def testEngineFactorySingletonPattern(self): # Call collectorsdb.engineFactory() engine = collectorsdb.engineFactory() # Call collectorsdb.engineFactory() again and assert singleton engine2 = collectorsdb.engineFactory() self.assertIs(engine2, engine) # Call collectorsdb.engineFactory() in different process, assert raises # AssertionError with self.assertRaises(AssertionError): multiprocessing.Pool(processes=1).apply(collectorsdb.engineFactory)
def _flagUnknownSymbolAsReported(symbol): """ Flag unknown company symbol as reported in database :param str symbol: symbol of the company's security (e.g., "AAPL") """ ins = schema.companySymbolFailures.insert( # pylint: disable=E1120 ).prefix_with('IGNORE', dialect="mysql").values(symbol=symbol) collectorsdb.engineFactory().execute(ins) g_log.debug("Saved unknown company symbol=%s", symbol)
def testEngineFactorySingletonPattern(self): # Call collectorsdb.engineFactory() engine = collectorsdb.engineFactory() # Call collectorsdb.engineFactory() again and assert singleton engine2 = collectorsdb.engineFactory() self.assertIs(engine2, engine) # Call collectorsdb.engineFactory() in different process, assert raises # AssertionError with self.assertRaises(AssertionError): multiprocessing.Pool(processes=1).apply(collectorsdb.engineFactory)
def testEngineFactorySingletonPattern(self): # Call collectorsdb.engineFactory() engine = collectorsdb.engineFactory() # Call collectorsdb.engineFactory() again and assert singleton engine2 = collectorsdb.engineFactory() self.assertIs(engine2, engine) # Call collectorsdb.engineFactory() in different process, assert new # instance originalEngineId = id(engine) engine3 = multiprocessing.Pool(processes=1).apply(_forkedEngineId) self.assertNotEqual(id(engine3), originalEngineId)
def testEngineFactorySingletonPattern(self): # Call collectorsdb.engineFactory() engine = collectorsdb.engineFactory() # Call collectorsdb.engineFactory() again and assert singleton engine2 = collectorsdb.engineFactory() self.assertIs(engine2, engine) # Call collectorsdb.engineFactory() in different process, assert new # instance originalEngineId = id(engine) engine3 = multiprocessing.Pool(processes=1).apply(_forkedEngineId) self.assertNotEqual(id(engine3), originalEngineId)
def updateLastEmittedSampleDatetime(key, sampleDatetime): """ Update the last emitted sample timestamp value in the database for the News Volume metrics :param str key: caller's key in schema.emittedSampleTracker :param datetime sampleDatetime: UTC datetime of last successfully-emitted sample batch """ update = schema.emittedSampleTracker.update().values( sample_ts=sampleDatetime).where( (schema.emittedSampleTracker.c.key == key)) collectorsdb.engineFactory().execute(update)
def _saveScreenNameFailure(unmappedScreenName): """ Save unmapped twitter handle in database :param unmappedScreenName: the twitter handle that is not valid anymore :type unmappedScreenName: string """ ins = (collectorsdb.schema.twitterHandleFailures.insert().prefix_with( 'IGNORE', dialect="mysql").values(handle=unmappedScreenName)) collectorsdb.engineFactory().execute(ins) g_log.info("Saved unmapped twitter handle; handle=%s", unmappedScreenName)
def updateLastEmittedSampleDatetime(key, sampleDatetime): """ Update the last emitted sample timestamp value in the database for the News Volume metrics :param str key: caller's key in schema.emittedSampleTracker :param datetime sampleDatetime: UTC datetime of last successfully-emitted sample batch """ update = ( schema.emittedSampleTracker.update() # pylint: disable=E1120 .values(sample_ts=sampleDatetime) .where((schema.emittedSampleTracker.c.key == key)) ) collectorsdb.engineFactory().execute(update)
def _saveScreenNameFailure(unmappedScreenName): """ Save unmapped twitter handle in database :param unmappedScreenName: the twitter handle that is not valid anymore :type unmappedScreenName: string """ ins = (collectorsdb.schema.twitterHandleFailures.insert() .prefix_with('IGNORE', dialect="mysql") .values(handle=unmappedScreenName)) collectorsdb.engineFactory().execute(ins) g_log.info("Saved unmapped twitter handle; handle=%s", unmappedScreenName)
def _purgeTweetsSlatedForDeletion(limit): """ Purge tweets that are slated for deletion as indicated by entries in the schema.twitterDeletion table :param limit: max records to purge per call :returns: a sequence of id's of deleted tweets """ twitterTweetsSchema = collectorsdb.schema.twitterTweets twitterDeletionSchema = collectorsdb.schema.twitterDeletion # NOTE: we first query the row id's to delete, so we can return them for # accountability and debugging rowsToDeleteSel = sqlalchemy.select([twitterTweetsSchema.c.uid]).where( twitterTweetsSchema.c.uid.in_( sqlalchemy.select([twitterDeletionSchema.c.tweet_uid ]))).limit(limit) numDeleted = 0 with collectorsdb.engineFactory().begin() as conn: rowIdsToDelete = tuple( str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall()) if rowIdsToDelete: tweetDeletion = twitterTweetsSchema.delete().where( twitterTweetsSchema.c.uid.in_(rowIdsToDelete)) numDeleted = conn.execute(tweetDeletion).rowcount if len(rowIdsToDelete) != numDeleted: g_log.error( "Expected to delete %d tweets, but actually deleted %d tweets", len(rowIdsToDelete), numDeleted) return rowIdsToDelete
def _purgeStaleDeletionRecords(limit): """ Delete stale rows in schema.twitterDeletion table :param limit: max records to purge per call :returns: a sequence of tweet_uid's of deleted schema.twitterDeletion rows """ twitterDeletionSchema = collectorsdb.schema.twitterDeletion # NOTE: we first query the row id's to delete, so we can return them for # accountability and debugging rowsToDeleteSel = sqlalchemy.select([ twitterDeletionSchema.c.tweet_uid ]).where(twitterDeletionSchema.c.created_at < sqlalchemy.func.date_sub( sqlalchemy.func.current_timestamp(), sqlalchemy.text("INTERVAL %i DAY" % (_DELETION_ROW_EXPIRY_DAYS, )))).limit(limit) numDeleted = 0 with collectorsdb.engineFactory().begin() as conn: rowIdsToDelete = tuple( str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall()) if rowIdsToDelete: deletion = twitterDeletionSchema.delete().where( twitterDeletionSchema.c.tweet_uid.in_(rowIdsToDelete)) numDeleted = conn.execute(deletion).rowcount if len(rowIdsToDelete) != numDeleted: g_log.error( "Expected to delete %d tweet delition request rows, but " "actually deleted %d rows", len(rowIdsToDelete), numDeleted) return rowIdsToDelete
def _deleteSecurity(symbol): """Delete security from xignite_security table""" with collectorsdb.engineFactory().begin() as conn: conn.execute( schema.xigniteSecurity # pylint: disable=E1120 .delete() .where(schema.xigniteSecurity.c.symbol == symbol))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--symbol", required=True) args = parser.parse_args() expectedAnswer = "Yes-%s" % (random.randint(1, 30),) with collectorsdb.engineFactory().begin() as conn: answer = raw_input( "Attention! You are about to reset the emitted status for the \"{}\"" " stock symbol at {}.\n" "\n" "To back out immediately without making any changes, feel free to type " "anything but \"{}\" in the prompt below, and press return.\n" "\n" "Are you sure you want to continue? ".format(args.symbol, str(conn.engine), str(expectedAnswer))) if answer.strip() != expectedAnswer: print "Aborting - Wise choice, my friend. Bye." return 1 deleteFromEmitted(conn, schema.emittedStockPrice, args.symbol) deleteFromEmitted(conn, schema.emittedStockVolume, args.symbol)
def _queryNewsVolumes(aggStartDatetime, aggStopDatetime): """ Query the database for the counts of security releases+headlines for each company that were detected during the specified time window. :param aggStartDatetime: inclusive start of aggregation interval as UTC datetime :param aggStopDatetime: non-inclusive upper bound of aggregation interval as UTC datetime :returns: a sparse sequence of two-tuples: (symbol, count); companies that have no detected news in the given aggregation period will be absent from the result. """ headlineSel = sql.select( [schema.xigniteSecurityHeadline.c.symbol.label("symbol")] ).where( (schema.xigniteSecurityHeadline.c.discovered_at >= aggStartDatetime) & (schema.xigniteSecurityHeadline.c.discovered_at < aggStopDatetime)) releaseSel = sql.select( [schema.xigniteSecurityRelease.c.symbol] ).where( (schema.xigniteSecurityRelease.c.discovered_at >= aggStartDatetime) & (schema.xigniteSecurityRelease.c.discovered_at < aggStopDatetime)) allNewsUnion = sql.union_all(headlineSel, releaseSel) aggSel = sql.select( ["symbol", sql.func.count("symbol").label("sym_count")] ).select_from(allNewsUnion.alias("union_of_tables") ).group_by("symbol") return collectorsdb.engineFactory().execute(aggSel).fetchall()
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport().initTool() try: options = _parseArgs() days = options["days"] g_log.info("Purging records from table=%s older than numDays=%s", collectorsdb.schema.twitterTweets, days) twitterTweetsSchema = collectorsdb.schema.twitterTweets query = twitterTweetsSchema.delete().where( twitterTweetsSchema.c.created_at < sqlalchemy.func.date_sub( sqlalchemy.func.utc_timestamp(), sqlalchemy.text("INTERVAL %i DAY" % (days,))) ) with collectorsdb.engineFactory().begin() as conn: result = conn.execute(query) g_log.info("Purged numRows=%s from table=%s", result.rowcount, collectorsdb.schema.twitterTweets) except SystemExit as e: if e.code != 0: g_log.exception("Failed!") raise except Exception: g_log.exception("Failed!") raise
def _purgeStaleDeletionRecords(limit): """ Delete stale rows in schema.twitterDeletion table :param limit: max records to purge per call :returns: a sequence of tweet_uid's of deleted schema.twitterDeletion rows """ twitterDeletionSchema = collectorsdb.schema.twitterDeletion # NOTE: we first query the row id's to delete, so we can return them for # accountability and debugging rowsToDeleteSel = sqlalchemy.select( [twitterDeletionSchema.c.tweet_uid]).where( twitterDeletionSchema.c.created_at < sqlalchemy.func.date_sub( sqlalchemy.func.current_timestamp(), sqlalchemy.text("INTERVAL %i DAY" % (_DELETION_ROW_EXPIRY_DAYS,))) ).limit(limit) numDeleted = 0 with collectorsdb.engineFactory().begin() as conn: rowIdsToDelete = tuple( str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall() ) if rowIdsToDelete: deletion = twitterDeletionSchema.delete().where( twitterDeletionSchema.c.tweet_uid.in_(rowIdsToDelete)) numDeleted = conn.execute(deletion).rowcount if len(rowIdsToDelete) != numDeleted: g_log.error("Expected to delete %d tweet delition request rows, but " "actually deleted %d rows", len(rowIdsToDelete), numDeleted) return rowIdsToDelete
def testEmittedSampleDatetime(self): key = "bogus-test-key" # Establish initial sample datetime result = metric_utils.establishLastEmittedSampleDatetime(key, 300) # Cleanup self.addCleanup(collectorsdb.engineFactory().execute, schema.emittedSampleTracker.delete().where( (schema.emittedSampleTracker.c.key == key) ) ) self.assertIsInstance(result, datetime) # Update latest emitted sample datetime to now now = datetime.utcnow().replace(microsecond=0) metric_utils.updateLastEmittedSampleDatetime(key, now) # Verify that it was updated lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key) self.assertEqual(now, lastEmittedSample) self.assertLess(result, lastEmittedSample)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--symbol", required=True) args = parser.parse_args() expectedAnswer = "Yes-%s" % (random.randint(1, 30), ) with collectorsdb.engineFactory().begin() as conn: answer = raw_input( "Attention! You are about to reset the emitted status for the \"{}\"" " stock symbol at {}.\n" "\n" "To back out immediately without making any changes, feel free to type " "anything but \"{}\" in the prompt below, and press return.\n" "\n" "Are you sure you want to continue? ".format( args.symbol, str(conn.engine), str(expectedAnswer))) if answer.strip() != expectedAnswer: print "Aborting - Wise choice, my friend. Bye." return 1 deleteFromEmitted(conn, schema.emittedStockPrice, args.symbol) deleteFromEmitted(conn, schema.emittedStockVolume, args.symbol)
def testEmittedSampleDatetime(self): key = "bogus-test-key" # Establish initial sample datetime result = metric_utils.establishLastEmittedSampleDatetime(key, 300) # Cleanup self.addCleanup( collectorsdb.engineFactory().execute, schema.emittedSampleTracker.delete().where( (schema.emittedSampleTracker.c.key == key))) self.assertIsInstance(result, datetime) # Update latest emitted sample datetime to now now = datetime.utcnow().replace(microsecond=0) metric_utils.updateLastEmittedSampleDatetime(key, now) # Verify that it was updated lastEmittedSample = metric_utils.queryLastEmittedSampleDatetime(key) self.assertEqual(now, lastEmittedSample) self.assertLess(result, lastEmittedSample)
def _queryNewsVolumes(aggStartDatetime, aggStopDatetime): """ Query the database for the counts of security releases+headlines for each company that were detected during the specified time window. :param aggStartDatetime: inclusive start of aggregation interval as UTC datetime :param aggStopDatetime: non-inclusive upper bound of aggregation interval as UTC datetime :returns: a sparse sequence of two-tuples: (symbol, count); companies that have no detected news in the given aggregation period will be absent from the result. """ headlineSel = sql.select([ schema.xigniteSecurityHeadline.c.symbol.label("symbol") ]).where( (schema.xigniteSecurityHeadline.c.discovered_at >= aggStartDatetime) & (schema.xigniteSecurityHeadline.c.discovered_at < aggStopDatetime)) releaseSel = sql.select([schema.xigniteSecurityRelease.c.symbol]).where( (schema.xigniteSecurityRelease.c.discovered_at >= aggStartDatetime) & (schema.xigniteSecurityRelease.c.discovered_at < aggStopDatetime)) allNewsUnion = sql.union_all(headlineSel, releaseSel) aggSel = sql.select([ "symbol", sql.func.count("symbol").label("sym_count") ]).select_from(allNewsUnion.alias("union_of_tables")).group_by("symbol") return collectorsdb.engineFactory().execute(aggSel).fetchall()
def _purgeTweetsSlatedForDeletion(limit): """ Purge tweets that are slated for deletion as indicated by entries in the schema.twitterDeletion table :param limit: max records to purge per call :returns: a sequence of id's of deleted tweets """ twitterTweetsSchema = collectorsdb.schema.twitterTweets twitterDeletionSchema = collectorsdb.schema.twitterDeletion # NOTE: we first query the row id's to delete, so we can return them for # accountability and debugging rowsToDeleteSel = sqlalchemy.select([twitterTweetsSchema.c.uid]).where( twitterTweetsSchema.c.uid.in_( sqlalchemy.select([twitterDeletionSchema.c.tweet_uid]))).limit(limit) numDeleted = 0 with collectorsdb.engineFactory().begin() as conn: rowIdsToDelete = tuple( str(row[0]) for row in conn.execute(rowsToDeleteSel).fetchall() ) if rowIdsToDelete: tweetDeletion = twitterTweetsSchema.delete().where( twitterTweetsSchema.c.uid.in_(rowIdsToDelete)) numDeleted = conn.execute(tweetDeletion).rowcount if len(rowIdsToDelete) != numDeleted: g_log.error("Expected to delete %d tweets, but actually deleted %d tweets", len(rowIdsToDelete), numDeleted) return rowIdsToDelete
def testPurgeOldTweets(self): gcThresholdDays = 90 now = datetime.utcnow() oldRows = [ dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 1), retweet=False, lang="en-us"), dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays + 2), retweet=False, lang="en-us"), ] youngRows = [ dict(uid=uuid.uuid1().hex, created_at=now, retweet=False, lang="en-us"), dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 1), retweet=False, lang="en-us"), dict(uid=uuid.uuid1().hex, created_at=now - timedelta(days=gcThresholdDays - 2), retweet=False, lang="en-us"), ] allRows = oldRows + youngRows # Patch collectorsdb config to use a temporary database with collectorsdb_test_utils.ManagedTempRepository("purgetweets"): engine = collectorsdb.engineFactory() numInserted = engine.execute( schema.twitterTweets.insert(), # pylint: disable=E1120 allRows).rowcount self.assertEqual(numInserted, len(allRows)) # Execute numDeleted = purge_old_tweets.purgeOldTweets(gcThresholdDays) # Verify self.assertEqual(numDeleted, len(oldRows)) # Verify that only the old tweets got purged remainingRows = engine.execute( sql.select([schema.twitterTweets.c.uid])).fetchall() self.assertEqual(len(remainingRows), len(youngRows)) self.assertItemsEqual([row["uid"] for row in youngRows], [row.uid for row in remainingRows]) # pylint: disable=E1101
def testTransientErrorRetryDecorator(self): # Setup proxy. We'll patch config later, so we need to cache the values # so that the original proxy may be restarted with the original params config = collectorsdb.CollectorsDbConfig() originalHost = config.get("repository", "host") originalPort = config.getint("repository", "port") def _startProxy(): p = startProxy(originalHost, originalPort, 6033) p.next() return p proxy = _startProxy() self.addCleanup(proxy.send, "kill") # Patch collectorsdb config with local proxy with ConfigAttributePatch( config.CONFIG_NAME, config.baseConfigDir, (("repository", "host", "127.0.0.1"), ("repository", "port", "6033"))): # Force refresh of engine singleton collectorsdb.resetEngineSingleton() engine = collectorsdb.engineFactory() # First, make sure valid query returns expected results res = collectorsdb.retryOnTransientErrors(engine.execute)("select 1") self.assertEqual(res.scalar(), 1) @collectorsdb.retryOnTransientErrors def _killProxyTryRestartProxyAndTryAgain(n=[]): if not n: # Kill the proxy on first attempt proxy.send("kill") proxy.next() try: engine.execute("select 1") self.fail("Proxy did not terminate as expected...") except sqlalchemy.exc.OperationalError: pass n.append(None) elif len(n) == 1: # Restore proxy in second attempt newProxy = _startProxy() self.addCleanup(newProxy.send, "kill") n.append(None) res = engine.execute("select 2") return res # Try again w/ retry decorator result = _killProxyTryRestartProxyAndTryAgain() # Verify that the expected value is eventually returned self.assertEqual(result.scalar(), 2)
def testTransientErrorRetryDecorator(self): # Setup proxy. We'll patch config later, so we need to cache the values # so that the original proxy may be restarted with the original params config = collectorsdb.CollectorsDbConfig() originalHost = config.get("repository", "host") originalPort = config.getint("repository", "port") def _startProxy(): p = startProxy(originalHost, originalPort, 6033) p.next() return p proxy = _startProxy() self.addCleanup(proxy.send, "kill") # Patch collectorsdb config with local proxy with ConfigAttributePatch(config.CONFIG_NAME, config.baseConfigDir, (("repository", "host", "127.0.0.1"), ("repository", "port", "6033"))): # Force refresh of engine singleton collectorsdb.resetEngineSingleton() engine = collectorsdb.engineFactory() # First, make sure valid query returns expected results res = collectorsdb.retryOnTransientErrors( engine.execute)("select 1") self.assertEqual(res.scalar(), 1) @collectorsdb.retryOnTransientErrors def _killProxyTryRestartProxyAndTryAgain(n=[]): # pylint: disable=W0102 if not n: # Kill the proxy on first attempt proxy.send("kill") proxy.next() try: engine.execute("select 1") self.fail("Proxy did not terminate as expected...") except sqlalchemy.exc.OperationalError: pass n.append(None) elif len(n) == 1: # Restore proxy in second attempt newProxy = _startProxy() self.addCleanup(newProxy.send, "kill") n.append(None) res = engine.execute("select 2") return res # Try again w/ retry decorator result = _killProxyTryRestartProxyAndTryAgain() # Verify that the expected value is eventually returned self.assertEqual(result.scalar(), 2)
def _clearUnknownSymbols(): """ Remove all rows from the company_symbol_failures table. """ result = collectorsdb.engineFactory().execute(schema.companySymbolFailures.delete()) if result.rowcount: g_log.info("Deleted %s rows from %s table", result.rowcount, schema.companySymbolFailures)
def queryLastEmittedNonMetricSequence(key): """ :param str key: caller's key in schema.emittedNonMetricTracker :returns: last emitted sequence number for non-metric source; None if one hasn't been saved yet. :rtype: int if not None """ sel = sql.select([schema.emittedNonMetricTracker.c.last_seq]).where(schema.emittedNonMetricTracker.c.key == key) return collectorsdb.engineFactory().execute(sel).scalar()
def queryLastEmittedSampleDatetime(key): """ :param str key: caller's key in schema.emittedSampleTracker :returns: UTC timestamp of the last successfully-emitted sample batch; None if one hasn't been set up yet; see establishLastEmittedSampleDatetime :rtype: datetime.datetime if not None """ sel = sql.select([schema.emittedSampleTracker.c.sample_ts]).where(schema.emittedSampleTracker.c.key == key) return collectorsdb.engineFactory().execute(sel).scalar()
def securityExists(symbol): security = collectorsdb.engineFactory().execute( sql.select([schema.xigniteSecurity.c.symbol]) .where(schema.xigniteSecurity.c.symbol == symbol) ).scalar() if security is not None: self.assertEqual(security, symbol) return True return False
def queryLastEmittedNonMetricSequence(key): """ :param str key: caller's key in schema.emittedNonMetricTracker :returns: last emitted sequence number for non-metric source; None if one hasn't been saved yet. :rtype: int if not None """ sel = sql.select([schema.emittedNonMetricTracker.c.last_seq ]).where(schema.emittedNonMetricTracker.c.key == key) return collectorsdb.engineFactory().execute(sel).scalar()
def queryLastEmittedSampleDatetime(key): """ :param str key: caller's key in schema.emittedSampleTracker :returns: UTC timestamp of the last successfully-emitted sample batch; None if one hasn't been set up yet; see establishLastEmittedSampleDatetime :rtype: datetime.datetime if not None """ sel = sql.select([schema.emittedSampleTracker.c.sample_ts ]).where(schema.emittedSampleTracker.c.key == key) return collectorsdb.engineFactory().execute(sel).scalar()
def _queryCachedCompanySymbols(): """Get the cached security symbols from the xignite_security table :returns: A sequence of stock symbols from the xignite_security table :rtype: sequence """ engine = collectorsdb.engineFactory() return tuple( row.symbol for row in engine.execute(sql.select([schema.xigniteSecurity.c.symbol])).fetchall())
def _clearUnknownSymbols(): """ Remove all rows from the company_symbol_failures table. """ result = collectorsdb.engineFactory().execute( schema.companySymbolFailures.delete()) # pylint: disable=E1120 if result.rowcount: g_log.info("Deleted %s rows from %s table", result.rowcount, schema.companySymbolFailures)
def _deleteScreenNameFailures(): """ Clear rows from the twitter_handle_failures table. """ result = collectorsdb.engineFactory().execute( collectorsdb.schema.twitterHandleFailures.delete()) if result.rowcount: g_log.info("Deleted %s rows from %s table", result.rowcount, collectorsdb.schema.twitterHandleFailures)
def securityExists(symbol): security = collectorsdb.engineFactory().execute( sql.select([ schema.xigniteSecurity.c.symbol ]).where(schema.xigniteSecurity.c.symbol == symbol)).scalar() if security is not None: self.assertEqual(security, symbol) return True return False
def _deleteScreenNameFailures(): """ Clear rows from the twitter_handle_failures table. """ result = collectorsdb.engineFactory().execute( collectorsdb.schema.twitterHandleFailures.delete() ) # pylint: disable=E1120 if result.rowcount: g_log.info("Deleted %s rows from %s table", result.rowcount, collectorsdb.schema.twitterHandleFailures)
def _unknownSymbolReported(symbol): """ Check if a specific company symbol already exists in the company_symbol_failures table. :param str symbol: symbol of the company's security (e.g., "AAPL") :returns: True, if symbol is already in the table. False, otherwise :rtype: bool """ sel = schema.companySymbolFailures.select().where(schema.companySymbolFailures.c.symbol == symbol) rows = collectorsdb.engineFactory().execute(sel).fetchall() return len(rows) > 0
def _resymbolStockMetrics(oldSymbol, newSymbol): """ Resymbol stock metrics :param str oldSymbol: old stock symbol, upper case :param str newSymbol: new stock symbol, upper case """ g_log.info("Renaming stock metrics: oldSymbol=%s, newSymbol=%s", oldSymbol, newSymbol) sqlEngine = collectorsdb.engineFactory() with sqlEngine.begin() as conn: # NOTE: the foreign key cascade-on-update relationship between # emitted_stock_price, emitted_stock_volume, xignite_security_bars, and the # xignite_security tables causes the symbol to be automatically updated or # the corresponding rows to be deleted in the former tables when the symbol # in xignite_security table is updated or deleted. # Delete emitted stock price rows for old symbol conn.execute( schema.emittedStockPrice # pylint: disable=E1120 .delete().where(schema.emittedStockPrice.c.symbol == oldSymbol)) # Delete emitted stock volume rows for old symbol conn.execute( schema.emittedStockVolume # pylint: disable=E1120 .delete().where(schema.emittedStockVolume.c.symbol == oldSymbol)) # Re-symbol xignite security row associated with the old symbol # # NOTE: we use IGNORE to ignore integrity errors (most likely duplicate), # because stock agent might insert a security row for the new symbol before # we do. conn.execute(schema.xigniteSecurity # pylint: disable=E1120 .update().prefix_with('IGNORE', dialect="mysql").where( schema.xigniteSecurity.c.symbol == oldSymbol).values( symbol=newSymbol)) # Delete old xignite security row just in case the rename aborted due to # integrity error conn.execute( schema.xigniteSecurity # pylint: disable=E1120 .delete().where(schema.xigniteSecurity.c.symbol == oldSymbol)) # Forward stock metric data samples to Taurus Engine g_log.info( "Forwarding new stock metric data samples for symbol=%s to Taurus " "engine...", newSymbol) xignite_stock_agent.transmitMetricData(metricSpecs=[ spec for spec in xignite_stock_agent.loadMetricSpecs() if spec.symbol == newSymbol ], symbol=newSymbol, engine=sqlEngine)
def _unknownSymbolReported(symbol): """ Check if a specific company symbol already exists in the company_symbol_failures table. :param str symbol: symbol of the company's security (e.g., "AAPL") :returns: True, if symbol is already in the table. False, otherwise :rtype: bool """ sel = schema.companySymbolFailures.select().where( schema.companySymbolFailures.c.symbol == symbol) rows = collectorsdb.engineFactory().execute(sel).fetchall() return len(rows) > 0
def testEngineFactorySingletonPattern(self, sqlalchemyMock): # Explicitly spec out sqlalchemy.create_engine() firstCall = Mock(spec_set=sqlalchemy.engine.base.Engine) sqlalchemyMock.create_engine.side_effect = [firstCall] # Call collectorsdb.engineFactory() engine = collectorsdb.engineFactory() self.assertIs(engine, firstCall) # Call collectorsdb.engineFactory() again and assert singleton engine2 = collectorsdb.engineFactory() self.assertIs(engine2, firstCall) self.assertEqual(sqlalchemyMock.create_engine.call_count, 1) # Call collectorsdb.engineFactory() in different process, assert raises # assertion error with patch("taurus.metric_collectors.collectorsdb.os.getpid", return_value=collectorsdb._EngineSingleton._pid + 1, autospec=True): with self.assertRaises(AssertionError): collectorsdb.engineFactory()
def queryEndDates(): sel = sql.select([ srcSchema.c.symbol, sql.func.max(srcSchema.c.local_pub_date) ]).group_by(srcSchema.c.symbol) resultProxy = collectorsdb.engineFactory().execute(sel) endDateMap = dict((row[0], row[1]) for row in resultProxy) g_log.debug("%s endDateMap=%s", srcSchema, endDateMap) return endDateMap
def testEngineFactorySingletonPattern(self, sqlalchemyMock): # Explicitly spec out sqlalchemy.create_engine() firstCall = Mock(spec_set=sqlalchemy.engine.base.Engine) secondCall = Mock(spec_set=sqlalchemy.engine.base.Engine) sqlalchemyMock.create_engine.side_effect = iter([firstCall, secondCall]) # Call collectorsdb.engineFactory() engine = collectorsdb.engineFactory() self.assertIs(engine, firstCall) # Call collectorsdb.engineFactory() again and assert singleton engine2 = collectorsdb.engineFactory() self.assertIs(engine2, firstCall) self.assertEqual(sqlalchemyMock.create_engine.call_count, 1) # Call collectorsdb.engineFactory() in different process, assert new # instance with patch("taurus.metric_collectors.collectorsdb.os", autospec=True) as osMock: osMock.getpid.return_value = collectorsdb._EngineSingleton._pid + 1 engine3 = collectorsdb.engineFactory() self.assertTrue(engine.dispose.called) self.assertIs(engine3, secondCall)
def testEmittedNonMetricSequence(self): key = "bogus-test-key" metric_utils.updateLastEmittedNonMetricSequence(key, 1) # Cleanup self.addCleanup( collectorsdb.engineFactory().execute, schema.emittedNonMetricTracker.delete().where( (schema.emittedNonMetricTracker.c.key == key))) lastEmittedSample = metric_utils.queryLastEmittedNonMetricSequence(key) self.assertEqual(1, lastEmittedSample)
def queryEndDates(): sel = sql.select( [srcSchema.c.symbol, sql.func.max(srcSchema.c.local_pub_date)] ).group_by(srcSchema.c.symbol) resultProxy = collectorsdb.engineFactory().execute(sel) endDateMap = dict( (row[0], row[1]) for row in resultProxy ) g_log.debug("%s endDateMap=%s", srcSchema, endDateMap) return endDateMap
def _screenNameFailureReported(screenName): """ Check if a specific twitter handle already exists in the tweet_handle_failures table. :param screenName: twitter handle :type screenName: string :returns: True, if twitter handle is already in the table. False, otherwise :rtype: Boolean """ table = collectorsdb.schema.twitterHandleFailures sel = (table.select().where(table.c.handle == screenName)) rows = collectorsdb.engineFactory().execute(sel) return rows.rowcount != 0
def _screenNameFailureReported(screenName): """ Check if a specific twitter handle already exists in the tweet_handle_failures table. :param screenName: twitter handle :type screenName: string :returns: True, if twitter handle is already in the table. False, otherwise :rtype: Boolean """ table = collectorsdb.schema.twitterHandleFailures sel = table.select().where(table.c.handle == screenName) rows = collectorsdb.engineFactory().execute(sel) return rows.rowcount != 0
def testEmittedNonMetricSequence(self): key = "bogus-test-key" metric_utils.updateLastEmittedNonMetricSequence(key, 1) # Cleanup self.addCleanup(collectorsdb.engineFactory().execute, schema.emittedNonMetricTracker.delete().where( (schema.emittedNonMetricTracker.c.key == key) ) ) lastEmittedSample = metric_utils.queryLastEmittedNonMetricSequence(key) self.assertEqual(1, lastEmittedSample)
def _queryExistingSecurityNewsRows(cls, table, symbol, startDate, endDate): """ Query the given headline or release table for rows matching the given ticker symbol within the given date range :param sqlalchemy.Table table: table to query :param symbol: symbol of the security (e.g., stock ticker symbol) :param datetime.date startDate: UTC start date for the operation; inclusive :param datetime.date endDate: UTC end date for the operation; inclusive :returns: (possibly empty) sequence of matching sqlalchemy.engine.RowProxy objects with the following fields: local_pub_date, url, source """ sel = sql.select([table.c.local_pub_date, table.c.url, table.c.source ]).where((table.c.local_pub_date >= startDate) & (table.c.local_pub_date <= endDate)) return collectorsdb.engineFactory().execute(sel).fetchall()
def _queryExistingSecurityNewsRows(cls, table, symbol, startDate, endDate): """ Query the given headline or release table for rows matching the given ticker symbol within the given date range :param sqlalchemy.Table table: table to query :param symbol: symbol of the security (e.g., stock ticker symbol) :param datetime.date startDate: UTC start date for the operation; inclusive :param datetime.date endDate: UTC end date for the operation; inclusive :returns: (possibly empty) sequence of matching sqlalchemy.engine.RowProxy objects with the following fields: local_pub_date, url, source """ sel = sql.select([table.c.local_pub_date, table.c.url, table.c.source] ).where( (table.c.local_pub_date >= startDate) & (table.c.local_pub_date <= endDate)) return collectorsdb.engineFactory().execute(sel).fetchall()
def _saveSecurityNews(self, headlineRows, xigniteSecurity): """ Store security news in the destination schema specified via _NEWS_SCHEMA member variable. :param headlineRows: rows of field values for target security news table :type headlineRows: sequence of dicts :param dict xigniteSecurity: Security info from xignite API results (e.g., global security news, security bars, etc.) :returns: The count of new news rows that were saved; 0 if the news object has no headlines. """ destSchema = self._NEWS_SCHEMA if not headlineRows: return 0 if self.dryRun: g_log.info("%r.process(dryRun=True): security=%s, news=%s", self, xigniteSecurity, headlineRows) return 0 engine = collectorsdb.engineFactory() @collectorsdb.retryOnTransientErrors def saveNews(): with engine.begin() as conn: # Save headlines newsIns = destSchema.insert().prefix_with("IGNORE", dialect="mysql") return conn.execute(newsIns, headlineRows).rowcount try: return saveNews() except sql.exc.IntegrityError: # Most likely foreign key constraint violation against the # xignite_security table g_log.info("Inserting security row for symbol=%s", xigniteSecurity["Symbol"]) xignite_agent_utils.insertSecurity(engine, xigniteSecurity) # Re-insert news after resolving IntegrityError return saveNews()
def addSecurity(symbol): self.addCleanup(_deleteSecurity, symbol) xignite_agent_utils.insertSecurity( engine=collectorsdb.engineFactory(), xigniteSecurity={ "Symbol": symbol, "CIK": "CIK", "CUSIP": "CUSIP", "ISIN": "ISIN", "Valoren": "Valoren", "Name": "{sym} Inc.".format(sym=symbol), "Market": "Market", "MarketIdentificationCode": "mic1", "MostLiquidExchange": True, "CategoryOrIndustry": "CategoryOrIndustry" }) self.assertTrue(securityExists(symbol), "inserted {symbol} not found".format(symbol=symbol))
def _saveSecurityNews(self, headlineRows, xigniteSecurity): """ Store security news in the destination schema specified via _NEWS_SCHEMA member variable. :param headlineRows: rows of field values for target security news table :type headlineRows: sequence of dicts :param dict xigniteSecurity: Security info from xignite API results (e.g., global security news, security bars, etc.) :returns: The count of new news rows that were saved; 0 if the news object has no headlines. """ destSchema = self._NEWS_SCHEMA if not headlineRows: return 0 if self.dryRun: g_log.info("%r.process(dryRun=True): security=%s, news=%s", self, xigniteSecurity, headlineRows) return 0 engine = collectorsdb.engineFactory() @collectorsdb.retryOnTransientErrors def saveNews(): with engine.begin() as conn: # Save headlines newsIns = destSchema.insert().prefix_with("IGNORE", dialect="mysql") return conn.execute(newsIns, headlineRows).rowcount try: return saveNews() except sql.exc.IntegrityError: # Most likely foreign key constraint violation against the # xignite_security table g_log.info("Inserting security row for symbol=%s", xigniteSecurity["Symbol"]) xignite_agent_utils.insertSecurity(engine, xigniteSecurity) # Re-insert news after resolving IntegrityError return saveNews()