def _checkCompanySymbols(xigniteApiToken): """ Check if company security symbols are valid. Email notifications are sent for invalid symbols. Each time an invalid symbol is reported successfully, we add it to a table keeping track of invalid symbols that were already reported -- to avoid duplicate emails about the same symbol. :param xigniteApiToken: Xignite API Token :type xigniteApiToken: string """ _selfCheck(xigniteApiToken) allSymbols = [sec[0] for sec in metric_utils.getAllMetricSecurities()] unknownSymbols = _validateSymbols(allSymbols, xigniteApiToken) if unknownSymbols: # Report unknown symbol g_log.error("Unknown=%s", unknownSymbols) for symbol in unknownSymbols: if not _unknownSymbolReported(symbol): subject = "%s: Company symbol=%s is unknown" % (__name__, symbol,) body = "%s: Company symbol=%s is unknown." % (__name__, symbol,) error_reporting.sendErrorEmail(subject=subject, body=body) # Flag it, so it won't be reported again _flagUnknownSymbolAsReported(symbol) else: # Remove all rows of company_symbol_failures table _clearUnknownSymbols() g_log.info("All company security symbols passed validation")
def testGetAllMetricSecurities(self): securities = metric_utils.getAllMetricSecurities() self.assertIsInstance(securities, tuple) self.assertTrue(securities) for security in securities: self.assertEqual(len(security), 2) symbol, exchange = security self.assertIsInstance(symbol, basestring) self.assertIsInstance(exchange, basestring) self.assertIn(exchange, ("NASDAQ", "NYSE"))
def _purgeDeprecatedCompanies(): """Purge cached data and Taurus Engine metrics/models corresponding to symbols that are in xignite_security table, but not in metrics configuration. """ activeCompanySymbols = set(security[0] for security in metric_utils.getAllMetricSecurities()) deprecatedSymbols = set(_queryCachedCompanySymbols()) - activeCompanySymbols if deprecatedSymbols: delete_companies.deleteCompanies( tickerSymbols=deprecatedSymbols, engineServer=_TAURUS_HTM_SERVER, engineApiKey=_TAURUS_API_KEY, warnAboutDestructiveAction=False) else: g_log.info("There were no deprecated companies to remove")
def _checkCompanySymbols(xigniteApiToken): """ Check if company security symbols are valid. Email notifications are sent for invalid symbols. Each time an invalid symbol is reported successfully, we add it to a table keeping track of invalid symbols that were already reported -- to avoid duplicate emails about the same symbol. :param xigniteApiToken: Xignite API Token :type xigniteApiToken: string """ _selfCheck(xigniteApiToken) allSymbols = [sec[0] for sec in metric_utils.getAllMetricSecurities()] unknownSymbols = _validateSymbols(allSymbols, xigniteApiToken) if unknownSymbols: # Report unknown symbol g_log.error("Unknown=%s", unknownSymbols) for symbol in unknownSymbols: if not _unknownSymbolReported(symbol): subject = "%s: Company symbol=%s is unknown" % ( __name__, symbol, ) body = "%s: Company symbol=%s is unknown." % ( __name__, symbol, ) error_reporting.sendErrorEmail(subject=subject, body=body) # Flag it, so it won't be reported again _flagUnknownSymbolAsReported(symbol) else: # Remove all rows of company_symbol_failures table _clearUnknownSymbols() g_log.info("All company security symbols passed validation")
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport.initService() options = _parseArgs() # See OP_MODE_ACTIVE, etc. in ApplicationConfig opMode = config.get("xignite_security_news_agent", "opmode") g_log.info("Starting: opMode=%s", opMode) aggSec = options.aggIntervalSec # Load metric specs from metric configuration metricSpecs = _loadNewsVolumeMetricSpecs() # Load securities from metric configuration securities = getAllMetricSecurities() g_log.info("Collecting headlines and releases for %s", securities) # Maps security symbols to the datetime.date of most recently-stored headlines lastSecurityHeadlineEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityHeadline) # Map security symbols to the datetime.date of most recently-stored releases lastSecurityReleaseEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityRelease) # Establish/retrieve datetime of last successfully-emitted metric data batch lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime( key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY, aggSec=aggSec) # Calculate next aggregation start time using lastEmittedAggTime as base lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(lastEmittedAggTime) nextAggEnd= lastAggStart + ( int((time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec # Poll, store and emit samples pollingIntervalSec = aggSec / 2.0 numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count()) g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d", pollingIntervalSec, numPoolWorkers) pool = multiprocessing.Pool(processes=numPoolWorkers) try: while True: pollingIntervalEnd = time.time() + pollingIntervalSec # Retrieve all headlines and releases of interest headlineTasks = _generateTasks( securities, lastSecurityHeadlineEndDates, options.backfillDays, taskClass=_HistoricalHeadlinesTask, dryRun=options.dryRun) releaseTasks = _generateTasks( securities, lastSecurityReleaseEndDates, options.backfillDays, taskClass=_HistoricalReleasesTask, dryRun=options.dryRun) allTasks = itertools.chain(headlineTasks, releaseTasks) _processNewsCollectionTasks(pool=pool, tasksIter=allTasks, headlineEndDates=lastSecurityHeadlineEndDates, releaseEndDates=lastSecurityReleaseEndDates, options=options) # Aggregate and forward metric samples to htmengine's Metric Listener if time.time() >= nextAggEnd: if opMode == config.OP_MODE_ACTIVE and not options.dryRun: lastEmittedAggTime = _forwardNewsVolumeMetrics( metricSpecs=metricSpecs, lastEmittedAggTime=lastEmittedAggTime, stopDatetime=datetime.utcfromtimestamp(nextAggEnd), periodSec=aggSec, metricDestAddr=options.metricDestAddr) nextAggEnd += aggSec sleepSec = pollingIntervalEnd - time.time() if sleepSec > 0: g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec) time.sleep(sleepSec) elif sleepSec < 0: g_log.warning("Processing exceeded pollingInterval=%ss by overage=%ss", pollingIntervalSec, -sleepSec) except KeyboardInterrupt: # Log with exception info to help debug deadlocks g_log.info("Observed KeyboardInterrupt", exc_info=True) pass finally: g_log.info("Closing multiprocessing.Pool") pool.close() g_log.info("Terminating multiprocessing.Pool") pool.terminate() g_log.info("Multiprocessing.Pool terminated")
def main(): """ NOTE: main also serves as entry point for "console script" generated by setup """ logging_support.LoggingSupport.initService() options = _parseArgs() # See OP_MODE_ACTIVE, etc. in ApplicationConfig opMode = config.get("xignite_security_news_agent", "opmode") g_log.info("Starting: opMode=%s", opMode) aggSec = options.aggIntervalSec # Load metric specs from metric configuration metricSpecs = _loadNewsVolumeMetricSpecs() # Load securities from metric configuration securities = getAllMetricSecurities() g_log.info("Collecting headlines and releases for %s", securities) # Maps security symbols to the datetime.date of most recently-stored headlines lastSecurityHeadlineEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityHeadline) # Map security symbols to the datetime.date of most recently-stored releases lastSecurityReleaseEndDates = _querySecurityNewsEndDates( schema.xigniteSecurityRelease) # Establish/retrieve datetime of last successfully-emitted metric data batch lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime( key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY, aggSec=aggSec) # Calculate next aggregation start time using lastEmittedAggTime as base lastAggStart = date_time_utils.epochFromNaiveUTCDatetime( lastEmittedAggTime) nextAggEnd = lastAggStart + (int( (time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec # Poll, store and emit samples pollingIntervalSec = aggSec / 2.0 numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count()) g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d", pollingIntervalSec, numPoolWorkers) pool = multiprocessing.Pool(processes=numPoolWorkers) try: while True: pollingIntervalEnd = time.time() + pollingIntervalSec # Retrieve all headlines and releases of interest headlineTasks = _generateTasks(securities, lastSecurityHeadlineEndDates, options.backfillDays, taskClass=_HistoricalHeadlinesTask, dryRun=options.dryRun) releaseTasks = _generateTasks(securities, lastSecurityReleaseEndDates, options.backfillDays, taskClass=_HistoricalReleasesTask, dryRun=options.dryRun) allTasks = itertools.chain(headlineTasks, releaseTasks) _processNewsCollectionTasks( pool=pool, tasksIter=allTasks, headlineEndDates=lastSecurityHeadlineEndDates, releaseEndDates=lastSecurityReleaseEndDates, options=options) # Aggregate and forward metric samples to htmengine's Metric Listener if time.time() >= nextAggEnd: if opMode == config.OP_MODE_ACTIVE and not options.dryRun: lastEmittedAggTime = _forwardNewsVolumeMetrics( metricSpecs=metricSpecs, lastEmittedAggTime=lastEmittedAggTime, stopDatetime=datetime.utcfromtimestamp(nextAggEnd), periodSec=aggSec, metricDestAddr=options.metricDestAddr) nextAggEnd += aggSec sleepSec = pollingIntervalEnd - time.time() if sleepSec > 0: g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec) time.sleep(sleepSec) elif sleepSec < 0: g_log.warning( "Processing exceeded pollingInterval=%ss by overage=%ss", pollingIntervalSec, -sleepSec) except KeyboardInterrupt: # Log with exception info to help debug deadlocks g_log.info("Observed KeyboardInterrupt", exc_info=True) pass finally: g_log.info("Closing multiprocessing.Pool") pool.close() g_log.info("Terminating multiprocessing.Pool") pool.terminate() g_log.info("Multiprocessing.Pool terminated")
def deleteCompanies( tickerSymbols, engineServer, engineApiKey, warnAboutDestructiveAction=True, warningTimeout=_DEFAULT_WARNING_PROMPT_TIMEOUT_SEC, ): """Delete companies from Taurus Collector and their metrics/models from Taurus Engine. :param sequence tickerSymbols: stock ticker symbols of companies to be deleted :param str engineServer: dns name of ip addres of Taurus API server :param str engineApiKey: API Key of Taurus HTM Engine :param bool warnAboutDestructiveAction: whether to warn about destructive action; defaults to True. :param float warningTimeout: Timeout for the warning prompt; ignored if warnAboutDestructiveAction is False :raises WarningPromptTimeout: if warning prompt timed out :raises UserAbortedOperation: if user chose to abort the operation :raises FlusherMetricNotFound: """ tickerSymbols = tuple(symbol.upper() for symbol in tickerSymbols) # Check for duplicate symbols repeatedSymbols = set(sym for sym in tickerSymbols if tickerSymbols.count(sym) > 1) if repeatedSymbols: raise ValueError( "{numRepeats} symbol(s) are present more than once in " "tickerSymbols arg: {repeats}".format(numRepeats=len(repeatedSymbols), repeats=repeatedSymbols) ) # Set will be handier going forward tickerSymbols = set(tickerSymbols) if warnAboutDestructiveAction: _warnAboutDestructiveAction(timeout=warningTimeout, tickerSymbols=tickerSymbols, engineServer=engineServer) # If any of the ticker symbols still appear in the collector's metrics config, # abort the operation as a precautionary measure. allSymbols = set(security[0].upper() for security in metric_utils.getAllMetricSecurities()) problemSymbols = tickerSymbols & allSymbols assert not problemSymbols, ( "Can't delete - {numProblem} of the specified companies [{symbols}] are " "in active metrics configuration".format(numProblem=len(problemSymbols), symbols=problemSymbols) ) # First, we need to synchronize with Taurus Engine's metric data path. # If any of the data still in the pipeline is for any of the companies being # deleted, then the metrics may be re-created in the Engine after we delete # them. This is an yet unresolved subtlety with custom metrics in htmengine. _flushTaurusEngineMetricDataPath(engineServer, engineApiKey) # NOTE: We must query custom metrics after flushing the metric data path, # since metrics may get created as a side-effect of processing metric data. allMetricsMap = { obj["name"]: obj for obj in metric_utils.getAllCustomMetrics(host=engineServer, apiKey=engineApiKey) } allMetricNames = allMetricsMap.keys() for symbolNum, symbol in enumerate(tickerSymbols, 1): # Delete corresponding metrics from Taurus Engine metricNamesToDelete = metric_utils.filterCompanyMetricNamesBySymbol(allMetricNames, symbol) if not metricNamesToDelete: g_log.info("No metrics to delete for symbol=%s (%d of %d)", symbol, symbolNum, len(tickerSymbols)) continue g_log.info( "Deleting metrics and models for ticker symbol=%s from Taurus " "Engine=%s (%d of %d)", symbol, engineServer, symbolNum, len(tickerSymbols), ) for metricName in metricNamesToDelete: metric_utils.deleteMetric(host=engineServer, apiKey=engineApiKey, metricName=metricName) g_log.info("Deleted metric name=%s, uid=%s", metricName, allMetricsMap[metricName]["uid"]) # Delete the symbol from xignite_security table last; this cascades to # delete related rows in other tables via cascading delete relationship. # # NOTE: garbage collection from other tables not tied to xiginte_security # symbols presently depends on aging of the rows (e.g., twitter tables). # After ENG-83, all company-specific rows from all tables will be # cleaned up and THIS NOTE SHOULD THEN BE REMOVED with collectorsdb.engineFactory().begin() as conn: numDeleted = ( conn.execute( collectorsdb.schema.xigniteSecurity.delete().where( # pylint: disable=E1120 collectorsdb.schema.xigniteSecurity.c.symbol == symbol ) ) ).rowcount if numDeleted: g_log.info("Deleted row=%s from table=%s", symbol, collectorsdb.schema.xigniteSecurity) else: g_log.warning( "Couldn't delete security row=%s: not found in table=%s", symbol, collectorsdb.schema.xigniteSecurity, )
def deleteCompanies(tickerSymbols, engineServer, engineApiKey, warnAboutDestructiveAction=True, warningTimeout=_DEFAULT_WARNING_PROMPT_TIMEOUT_SEC): """Delete companies from Taurus Collector and their metrics/models from Taurus Engine. :param sequence tickerSymbols: stock ticker symbols of companies to be deleted :param str engineServer: dns name of ip addres of Taurus API server :param str engineApiKey: API Key of Taurus HTM Engine :param bool warnAboutDestructiveAction: whether to warn about destructive action; defaults to True. :param float warningTimeout: Timeout for the warning prompt; ignored if warnAboutDestructiveAction is False :raises WarningPromptTimeout: if warning prompt timed out :raises UserAbortedOperation: if user chose to abort the operation :raises FlusherMetricNotFound: """ tickerSymbols = tuple(symbol.upper() for symbol in tickerSymbols) # Check for duplicate symbols repeatedSymbols = set(sym for sym in tickerSymbols if tickerSymbols.count(sym) > 1) if repeatedSymbols: raise ValueError( "{numRepeats} symbol(s) are present more than once in " "tickerSymbols arg: {repeats}".format( numRepeats=len(repeatedSymbols), repeats=repeatedSymbols)) # Set will be handier going forward tickerSymbols = set(tickerSymbols) if warnAboutDestructiveAction: _warnAboutDestructiveAction(timeout=warningTimeout, tickerSymbols=tickerSymbols, engineServer=engineServer) # If any of the ticker symbols still appear in the collector's metrics config, # abort the operation as a precautionary measure. allSymbols = set(security[0].upper() for security in metric_utils.getAllMetricSecurities()) problemSymbols = tickerSymbols & allSymbols assert not problemSymbols, ( "Can't delete - {numProblem} of the specified companies [{symbols}] are " "in active metrics configuration".format( numProblem=len(problemSymbols), symbols=problemSymbols)) # First, we need to synchronize with Taurus Engine's metric data path. # If any of the data still in the pipeline is for any of the companies being # deleted, then the metrics may be re-created in the Engine after we delete # them. This is an yet unresolved subtlety with custom metrics in htmengine. _flushTaurusEngineMetricDataPath(engineServer, engineApiKey) # NOTE: We must query custom metrics after flushing the metric data path, # since metrics may get created as a side-effect of processing metric data. allMetricsMap = { obj["name"]: obj for obj in metric_utils.getAllCustomMetrics(host=engineServer, apiKey=engineApiKey) } allMetricNames = allMetricsMap.keys() for symbolNum, symbol in enumerate(tickerSymbols, 1): # Delete corresponding metrics from Taurus Engine metricNamesToDelete = metric_utils.filterCompanyMetricNamesBySymbol( allMetricNames, symbol) if not metricNamesToDelete: g_log.info("No metrics to delete for symbol=%s (%d of %d)", symbol, symbolNum, len(tickerSymbols)) continue g_log.info( "Deleting metrics and models for ticker symbol=%s from Taurus " "Engine=%s (%d of %d)", symbol, engineServer, symbolNum, len(tickerSymbols)) for metricName in metricNamesToDelete: metric_utils.deleteMetric(host=engineServer, apiKey=engineApiKey, metricName=metricName) g_log.info("Deleted metric name=%s, uid=%s", metricName, allMetricsMap[metricName]["uid"]) # Delete the symbol from xignite_security table last; this cascades to # delete related rows in other tables via cascading delete relationship. # # NOTE: garbage collection from other tables not tied to xiginte_security # symbols presently depends on aging of the rows (e.g., twitter tables). # After ENG-83, all company-specific rows from all tables will be # cleaned up and THIS NOTE SHOULD THEN BE REMOVED with collectorsdb.engineFactory().begin() as conn: numDeleted = ( conn.execute(collectorsdb.schema.xigniteSecurity # pylint: disable=E1120 .delete().where( collectorsdb.schema.xigniteSecurity.c.symbol == symbol))).rowcount if numDeleted: g_log.info("Deleted row=%s from table=%s", symbol, collectorsdb.schema.xigniteSecurity) else: g_log.warning( "Couldn't delete security row=%s: not found in table=%s", symbol, collectorsdb.schema.xigniteSecurity)