def _checkCompanySymbols(xigniteApiToken):
  """
  Check if company security symbols are valid.
  Email notifications are sent for invalid symbols.
  Each time an invalid symbol is reported successfully, we add it to a table
  keeping track of invalid symbols that were already reported -- to avoid
  duplicate emails about the same symbol.

  :param xigniteApiToken: Xignite API Token
  :type xigniteApiToken: string
  """
  _selfCheck(xigniteApiToken)

  allSymbols = [sec[0] for sec in metric_utils.getAllMetricSecurities()]

  unknownSymbols = _validateSymbols(allSymbols, xigniteApiToken)
  if unknownSymbols:
    # Report unknown symbol
    g_log.error("Unknown=%s", unknownSymbols)

    for symbol in unknownSymbols:
      if not _unknownSymbolReported(symbol):
        subject = "%s: Company symbol=%s is unknown" % (__name__, symbol,)
        body = "%s: Company symbol=%s is unknown." % (__name__, symbol,)
        error_reporting.sendErrorEmail(subject=subject, body=body)

        # Flag it, so it won't be reported again
        _flagUnknownSymbolAsReported(symbol)
  else:
    # Remove all rows of company_symbol_failures table
    _clearUnknownSymbols()
    g_log.info("All company security symbols passed validation")
 def testGetAllMetricSecurities(self):
   securities = metric_utils.getAllMetricSecurities()
   self.assertIsInstance(securities, tuple)
   self.assertTrue(securities)
   for security in securities:
     self.assertEqual(len(security), 2)
     symbol, exchange = security
     self.assertIsInstance(symbol, basestring)
     self.assertIsInstance(exchange, basestring)
     self.assertIn(exchange, ("NASDAQ", "NYSE"))
Exemple #3
0
 def testGetAllMetricSecurities(self):
     securities = metric_utils.getAllMetricSecurities()
     self.assertIsInstance(securities, tuple)
     self.assertTrue(securities)
     for security in securities:
         self.assertEqual(len(security), 2)
         symbol, exchange = security
         self.assertIsInstance(symbol, basestring)
         self.assertIsInstance(exchange, basestring)
         self.assertIn(exchange, ("NASDAQ", "NYSE"))
def _purgeDeprecatedCompanies():
  """Purge cached data and Taurus Engine metrics/models corresponding to symbols
  that are in xignite_security table, but not in metrics configuration.
  """
  activeCompanySymbols = set(security[0] for security in
                             metric_utils.getAllMetricSecurities())

  deprecatedSymbols = set(_queryCachedCompanySymbols()) - activeCompanySymbols

  if deprecatedSymbols:
    delete_companies.deleteCompanies(
      tickerSymbols=deprecatedSymbols,
      engineServer=_TAURUS_HTM_SERVER,
      engineApiKey=_TAURUS_API_KEY,
      warnAboutDestructiveAction=False)
  else:
    g_log.info("There were no deprecated companies to remove")
Exemple #5
0
def _checkCompanySymbols(xigniteApiToken):
    """
  Check if company security symbols are valid.
  Email notifications are sent for invalid symbols.
  Each time an invalid symbol is reported successfully, we add it to a table
  keeping track of invalid symbols that were already reported -- to avoid
  duplicate emails about the same symbol.

  :param xigniteApiToken: Xignite API Token
  :type xigniteApiToken: string
  """
    _selfCheck(xigniteApiToken)

    allSymbols = [sec[0] for sec in metric_utils.getAllMetricSecurities()]

    unknownSymbols = _validateSymbols(allSymbols, xigniteApiToken)
    if unknownSymbols:
        # Report unknown symbol
        g_log.error("Unknown=%s", unknownSymbols)

        for symbol in unknownSymbols:
            if not _unknownSymbolReported(symbol):
                subject = "%s: Company symbol=%s is unknown" % (
                    __name__,
                    symbol,
                )
                body = "%s: Company symbol=%s is unknown." % (
                    __name__,
                    symbol,
                )
                error_reporting.sendErrorEmail(subject=subject, body=body)

                # Flag it, so it won't be reported again
                _flagUnknownSymbolAsReported(symbol)
    else:
        # Remove all rows of company_symbol_failures table
        _clearUnknownSymbols()
        g_log.info("All company security symbols passed validation")
def main():
  """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
  logging_support.LoggingSupport.initService()

  options = _parseArgs()

  # See OP_MODE_ACTIVE, etc. in ApplicationConfig
  opMode = config.get("xignite_security_news_agent", "opmode")
  g_log.info("Starting: opMode=%s", opMode)

  aggSec = options.aggIntervalSec

  # Load metric specs from metric configuration
  metricSpecs = _loadNewsVolumeMetricSpecs()

  # Load securities from metric configuration
  securities = getAllMetricSecurities()
  g_log.info("Collecting headlines and releases for %s", securities)

  # Maps security symbols to the datetime.date of most recently-stored headlines
  lastSecurityHeadlineEndDates = _querySecurityNewsEndDates(
    schema.xigniteSecurityHeadline)

  # Map security symbols to the datetime.date of most recently-stored releases
  lastSecurityReleaseEndDates = _querySecurityNewsEndDates(
    schema.xigniteSecurityRelease)

  # Establish/retrieve datetime of last successfully-emitted metric data batch
  lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
    key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY,
    aggSec=aggSec)

  # Calculate next aggregation start time using lastEmittedAggTime as base
  lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(lastEmittedAggTime)
  nextAggEnd= lastAggStart + (
    int((time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec

  # Poll, store and emit samples
  pollingIntervalSec = aggSec / 2.0
  numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count())
  g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d",
             pollingIntervalSec, numPoolWorkers)
  pool = multiprocessing.Pool(processes=numPoolWorkers)
  try:
    while True:
      pollingIntervalEnd = time.time() + pollingIntervalSec

      # Retrieve all headlines and releases of interest
      headlineTasks = _generateTasks(
        securities,
        lastSecurityHeadlineEndDates,
        options.backfillDays,
        taskClass=_HistoricalHeadlinesTask,
        dryRun=options.dryRun)

      releaseTasks = _generateTasks(
        securities,
        lastSecurityReleaseEndDates,
        options.backfillDays,
        taskClass=_HistoricalReleasesTask,
        dryRun=options.dryRun)

      allTasks = itertools.chain(headlineTasks, releaseTasks)

      _processNewsCollectionTasks(pool=pool,
                                  tasksIter=allTasks,
                                  headlineEndDates=lastSecurityHeadlineEndDates,
                                  releaseEndDates=lastSecurityReleaseEndDates,
                                  options=options)

      # Aggregate and forward metric samples to htmengine's Metric Listener
      if time.time() >= nextAggEnd:
        if opMode == config.OP_MODE_ACTIVE and not options.dryRun:
          lastEmittedAggTime = _forwardNewsVolumeMetrics(
            metricSpecs=metricSpecs,
            lastEmittedAggTime=lastEmittedAggTime,
            stopDatetime=datetime.utcfromtimestamp(nextAggEnd),
            periodSec=aggSec,
            metricDestAddr=options.metricDestAddr)

        nextAggEnd += aggSec

      sleepSec = pollingIntervalEnd - time.time()
      if sleepSec > 0:
        g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec)
        time.sleep(sleepSec)
      elif sleepSec < 0:
        g_log.warning("Processing exceeded pollingInterval=%ss by overage=%ss",
                      pollingIntervalSec, -sleepSec)
  except KeyboardInterrupt:
    # Log with exception info to help debug deadlocks
    g_log.info("Observed KeyboardInterrupt", exc_info=True)
    pass
  finally:
    g_log.info("Closing multiprocessing.Pool")
    pool.close()

    g_log.info("Terminating multiprocessing.Pool")
    pool.terminate()
    g_log.info("Multiprocessing.Pool terminated")
Exemple #7
0
def main():
    """
  NOTE: main also serves as entry point for "console script" generated by setup
  """
    logging_support.LoggingSupport.initService()

    options = _parseArgs()

    # See OP_MODE_ACTIVE, etc. in ApplicationConfig
    opMode = config.get("xignite_security_news_agent", "opmode")
    g_log.info("Starting: opMode=%s", opMode)

    aggSec = options.aggIntervalSec

    # Load metric specs from metric configuration
    metricSpecs = _loadNewsVolumeMetricSpecs()

    # Load securities from metric configuration
    securities = getAllMetricSecurities()
    g_log.info("Collecting headlines and releases for %s", securities)

    # Maps security symbols to the datetime.date of most recently-stored headlines
    lastSecurityHeadlineEndDates = _querySecurityNewsEndDates(
        schema.xigniteSecurityHeadline)

    # Map security symbols to the datetime.date of most recently-stored releases
    lastSecurityReleaseEndDates = _querySecurityNewsEndDates(
        schema.xigniteSecurityRelease)

    # Establish/retrieve datetime of last successfully-emitted metric data batch
    lastEmittedAggTime = metric_utils.establishLastEmittedSampleDatetime(
        key=_EMITTED_NEWS_VOLUME_SAMPLE_TRACKER_KEY, aggSec=aggSec)

    # Calculate next aggregation start time using lastEmittedAggTime as base
    lastAggStart = date_time_utils.epochFromNaiveUTCDatetime(
        lastEmittedAggTime)
    nextAggEnd = lastAggStart + (int(
        (time.time() - lastAggStart + aggSec - 1) / aggSec) * aggSec) + aggSec

    # Poll, store and emit samples
    pollingIntervalSec = aggSec / 2.0
    numPoolWorkers = max(_MIN_POOL_CONCURRENCY, multiprocessing.cpu_count())
    g_log.info("Entering main loop: pollingIntervalSec=%s; numPoolWorkers=%d",
               pollingIntervalSec, numPoolWorkers)
    pool = multiprocessing.Pool(processes=numPoolWorkers)
    try:
        while True:
            pollingIntervalEnd = time.time() + pollingIntervalSec

            # Retrieve all headlines and releases of interest
            headlineTasks = _generateTasks(securities,
                                           lastSecurityHeadlineEndDates,
                                           options.backfillDays,
                                           taskClass=_HistoricalHeadlinesTask,
                                           dryRun=options.dryRun)

            releaseTasks = _generateTasks(securities,
                                          lastSecurityReleaseEndDates,
                                          options.backfillDays,
                                          taskClass=_HistoricalReleasesTask,
                                          dryRun=options.dryRun)

            allTasks = itertools.chain(headlineTasks, releaseTasks)

            _processNewsCollectionTasks(
                pool=pool,
                tasksIter=allTasks,
                headlineEndDates=lastSecurityHeadlineEndDates,
                releaseEndDates=lastSecurityReleaseEndDates,
                options=options)

            # Aggregate and forward metric samples to htmengine's Metric Listener
            if time.time() >= nextAggEnd:
                if opMode == config.OP_MODE_ACTIVE and not options.dryRun:
                    lastEmittedAggTime = _forwardNewsVolumeMetrics(
                        metricSpecs=metricSpecs,
                        lastEmittedAggTime=lastEmittedAggTime,
                        stopDatetime=datetime.utcfromtimestamp(nextAggEnd),
                        periodSec=aggSec,
                        metricDestAddr=options.metricDestAddr)

                nextAggEnd += aggSec

            sleepSec = pollingIntervalEnd - time.time()
            if sleepSec > 0:
                g_log.info("Sleeping for %f seconds. zzzzzzzz...", sleepSec)
                time.sleep(sleepSec)
            elif sleepSec < 0:
                g_log.warning(
                    "Processing exceeded pollingInterval=%ss by overage=%ss",
                    pollingIntervalSec, -sleepSec)
    except KeyboardInterrupt:
        # Log with exception info to help debug deadlocks
        g_log.info("Observed KeyboardInterrupt", exc_info=True)
        pass
    finally:
        g_log.info("Closing multiprocessing.Pool")
        pool.close()

        g_log.info("Terminating multiprocessing.Pool")
        pool.terminate()
        g_log.info("Multiprocessing.Pool terminated")
def deleteCompanies(
    tickerSymbols,
    engineServer,
    engineApiKey,
    warnAboutDestructiveAction=True,
    warningTimeout=_DEFAULT_WARNING_PROMPT_TIMEOUT_SEC,
):
    """Delete companies from Taurus Collector and their metrics/models from
  Taurus Engine.

  :param sequence tickerSymbols: stock ticker symbols of companies to be
    deleted

  :param str engineServer: dns name of ip addres of Taurus API server

  :param str engineApiKey: API Key of Taurus HTM Engine

  :param bool warnAboutDestructiveAction: whether to warn about destructive
    action; defaults to True.

  :param float warningTimeout: Timeout for the warning prompt; ignored if
    warnAboutDestructiveAction is False

  :raises WarningPromptTimeout: if warning prompt timed out
  :raises UserAbortedOperation: if user chose to abort the operation
  :raises FlusherMetricNotFound:
  """
    tickerSymbols = tuple(symbol.upper() for symbol in tickerSymbols)

    # Check for duplicate symbols
    repeatedSymbols = set(sym for sym in tickerSymbols if tickerSymbols.count(sym) > 1)
    if repeatedSymbols:
        raise ValueError(
            "{numRepeats} symbol(s) are present more than once in "
            "tickerSymbols arg: {repeats}".format(numRepeats=len(repeatedSymbols), repeats=repeatedSymbols)
        )

    # Set will be handier going forward
    tickerSymbols = set(tickerSymbols)

    if warnAboutDestructiveAction:
        _warnAboutDestructiveAction(timeout=warningTimeout, tickerSymbols=tickerSymbols, engineServer=engineServer)

    # If any of the ticker symbols still appear in the collector's metrics config,
    # abort the operation as a precautionary measure.
    allSymbols = set(security[0].upper() for security in metric_utils.getAllMetricSecurities())

    problemSymbols = tickerSymbols & allSymbols
    assert not problemSymbols, (
        "Can't delete - {numProblem} of the specified companies [{symbols}] are "
        "in active metrics configuration".format(numProblem=len(problemSymbols), symbols=problemSymbols)
    )

    # First, we need to synchronize with Taurus Engine's metric data path.
    # If any of the data still in the pipeline is for any of the companies being
    # deleted, then the metrics may be re-created in the Engine after we delete
    # them. This is an yet unresolved subtlety with custom metrics in htmengine.
    _flushTaurusEngineMetricDataPath(engineServer, engineApiKey)

    # NOTE: We must query custom metrics after flushing the metric data path,
    # since metrics may get created as a side-effect of processing metric data.
    allMetricsMap = {
        obj["name"]: obj for obj in metric_utils.getAllCustomMetrics(host=engineServer, apiKey=engineApiKey)
    }

    allMetricNames = allMetricsMap.keys()

    for symbolNum, symbol in enumerate(tickerSymbols, 1):
        # Delete corresponding metrics from Taurus Engine
        metricNamesToDelete = metric_utils.filterCompanyMetricNamesBySymbol(allMetricNames, symbol)
        if not metricNamesToDelete:
            g_log.info("No metrics to delete for symbol=%s (%d of %d)", symbol, symbolNum, len(tickerSymbols))
            continue

        g_log.info(
            "Deleting metrics and models for ticker symbol=%s from Taurus " "Engine=%s (%d of %d)",
            symbol,
            engineServer,
            symbolNum,
            len(tickerSymbols),
        )

        for metricName in metricNamesToDelete:
            metric_utils.deleteMetric(host=engineServer, apiKey=engineApiKey, metricName=metricName)
            g_log.info("Deleted metric name=%s, uid=%s", metricName, allMetricsMap[metricName]["uid"])

        # Delete the symbol from xignite_security table last; this cascades to
        # delete related rows in other tables via cascading delete relationship.
        #
        # NOTE: garbage collection from other tables not tied to xiginte_security
        #  symbols presently depends on aging of the rows (e.g., twitter tables).
        #  After ENG-83, all company-specific rows from all tables will be
        # cleaned up and THIS NOTE SHOULD THEN BE REMOVED
        with collectorsdb.engineFactory().begin() as conn:
            numDeleted = (
                conn.execute(
                    collectorsdb.schema.xigniteSecurity.delete().where(  # pylint: disable=E1120
                        collectorsdb.schema.xigniteSecurity.c.symbol == symbol
                    )
                )
            ).rowcount

            if numDeleted:
                g_log.info("Deleted row=%s from table=%s", symbol, collectorsdb.schema.xigniteSecurity)
            else:
                g_log.warning(
                    "Couldn't delete security row=%s: not found in table=%s",
                    symbol,
                    collectorsdb.schema.xigniteSecurity,
                )
def deleteCompanies(tickerSymbols,
                    engineServer,
                    engineApiKey,
                    warnAboutDestructiveAction=True,
                    warningTimeout=_DEFAULT_WARNING_PROMPT_TIMEOUT_SEC):
    """Delete companies from Taurus Collector and their metrics/models from
  Taurus Engine.

  :param sequence tickerSymbols: stock ticker symbols of companies to be
    deleted

  :param str engineServer: dns name of ip addres of Taurus API server

  :param str engineApiKey: API Key of Taurus HTM Engine

  :param bool warnAboutDestructiveAction: whether to warn about destructive
    action; defaults to True.

  :param float warningTimeout: Timeout for the warning prompt; ignored if
    warnAboutDestructiveAction is False

  :raises WarningPromptTimeout: if warning prompt timed out
  :raises UserAbortedOperation: if user chose to abort the operation
  :raises FlusherMetricNotFound:
  """
    tickerSymbols = tuple(symbol.upper() for symbol in tickerSymbols)

    # Check for duplicate symbols
    repeatedSymbols = set(sym for sym in tickerSymbols
                          if tickerSymbols.count(sym) > 1)
    if repeatedSymbols:
        raise ValueError(
            "{numRepeats} symbol(s) are present more than once in "
            "tickerSymbols arg: {repeats}".format(
                numRepeats=len(repeatedSymbols), repeats=repeatedSymbols))

    # Set will be handier going forward
    tickerSymbols = set(tickerSymbols)

    if warnAboutDestructiveAction:
        _warnAboutDestructiveAction(timeout=warningTimeout,
                                    tickerSymbols=tickerSymbols,
                                    engineServer=engineServer)

    # If any of the ticker symbols still appear in the collector's metrics config,
    # abort the operation as a precautionary measure.
    allSymbols = set(security[0].upper()
                     for security in metric_utils.getAllMetricSecurities())

    problemSymbols = tickerSymbols & allSymbols
    assert not problemSymbols, (
        "Can't delete - {numProblem} of the specified companies [{symbols}] are "
        "in active metrics configuration".format(
            numProblem=len(problemSymbols), symbols=problemSymbols))

    # First, we need to synchronize with Taurus Engine's metric data path.
    # If any of the data still in the pipeline is for any of the companies being
    # deleted, then the metrics may be re-created in the Engine after we delete
    # them. This is an yet unresolved subtlety with custom metrics in htmengine.
    _flushTaurusEngineMetricDataPath(engineServer, engineApiKey)

    # NOTE: We must query custom metrics after flushing the metric data path,
    # since metrics may get created as a side-effect of processing metric data.
    allMetricsMap = {
        obj["name"]: obj
        for obj in metric_utils.getAllCustomMetrics(host=engineServer,
                                                    apiKey=engineApiKey)
    }

    allMetricNames = allMetricsMap.keys()

    for symbolNum, symbol in enumerate(tickerSymbols, 1):
        # Delete corresponding metrics from Taurus Engine
        metricNamesToDelete = metric_utils.filterCompanyMetricNamesBySymbol(
            allMetricNames, symbol)
        if not metricNamesToDelete:
            g_log.info("No metrics to delete for symbol=%s (%d of %d)", symbol,
                       symbolNum, len(tickerSymbols))
            continue

        g_log.info(
            "Deleting metrics and models for ticker symbol=%s from Taurus "
            "Engine=%s (%d of %d)", symbol, engineServer, symbolNum,
            len(tickerSymbols))

        for metricName in metricNamesToDelete:
            metric_utils.deleteMetric(host=engineServer,
                                      apiKey=engineApiKey,
                                      metricName=metricName)
            g_log.info("Deleted metric name=%s, uid=%s", metricName,
                       allMetricsMap[metricName]["uid"])

        # Delete the symbol from xignite_security table last; this cascades to
        # delete related rows in other tables via cascading delete relationship.
        #
        # NOTE: garbage collection from other tables not tied to xiginte_security
        #  symbols presently depends on aging of the rows (e.g., twitter tables).
        #  After ENG-83, all company-specific rows from all tables will be
        # cleaned up and THIS NOTE SHOULD THEN BE REMOVED
        with collectorsdb.engineFactory().begin() as conn:
            numDeleted = (
                conn.execute(collectorsdb.schema.xigniteSecurity  # pylint: disable=E1120
                             .delete().where(
                                 collectorsdb.schema.xigniteSecurity.c.symbol
                                 == symbol))).rowcount

            if numDeleted:
                g_log.info("Deleted row=%s from table=%s", symbol,
                           collectorsdb.schema.xigniteSecurity)
            else:
                g_log.warning(
                    "Couldn't delete security row=%s: not found in table=%s",
                    symbol, collectorsdb.schema.xigniteSecurity)