Beispiel #1
0
def main():
  try:
      logger = setupLog()
      configContext = setupConfig()
      logger.info("current configuration\n%s", str(configContext))
      conn = None
      try:
        testConfig(configContext)
        databaseDSN = "host=%(databaseHost)s dbname=%(databaseName)s user=%(databaseUserName)s password=%(databasePassword)s" % configContext
        # Be sure self.connection is closed before you quit!
        conn = psycopg2.connect(databaseDSN)
        cursor = conn.cursor()
        cursor.execute(all_tables_sql)
        tables = cursor.fetchall()
        for reports in tables:
          logger.info("Processing %s" % reports[0])
          plugins_reports = "plugins_%s" % reports[0]
          params = migrate_process_type_params(reports[0], plugins_reports)
          try:
            cursor.execute(migrate_process_type_sql % params)
            logger.info("%d rows updated" % cursor.rowcount)
            conn.commit()
          except psycopg2.ProgrammingError, x:
            logging.warn("Skipping %s as %s doesn't exist" % (reports[0], plugins_reports))
            conn.rollback()
        conn.close()
      except (psycopg2.OperationalError, AssertionError),x:
        lib_util.reportExceptionAndAbort(logger)
def main():
    try:
        logger = setupLog()
        configContext = setupConfig()
        logger.info("current configuration\n%s", str(configContext))
        conn = None
        try:
            testConfig(configContext)
            databaseDSN = "host=%(databaseHost)s dbname=%(databaseName)s user=%(databaseUserName)s password=%(databasePassword)s" % configContext
            # Be sure self.connection is closed before you quit!
            conn = psycopg2.connect(databaseDSN)
            cursor = conn.cursor()
            cursor.execute(all_tables_sql)
            tables = cursor.fetchall()
            for reports in tables:
                logger.info("Processing %s" % reports[0])
                plugins_reports = "plugins_%s" % reports[0]
                params = migrate_process_type_params(reports[0],
                                                     plugins_reports)
                try:
                    cursor.execute(migrate_process_type_sql % params)
                    logger.info("%d rows updated" % cursor.rowcount)
                    conn.commit()
                except psycopg2.ProgrammingError, x:
                    logging.warn("Skipping %s as %s doesn't exist" %
                                 (reports[0], plugins_reports))
                    conn.rollback()
            conn.close()
        except (psycopg2.OperationalError, AssertionError), x:
            lib_util.reportExceptionAndAbort(logger)
Beispiel #3
0
def getProductId(aProduct, aVersion, aCursor, logger):
  logger.debug("getProductId")
  if not aProduct or not aVersion:
    return None
  try:
    return psy.singleValueSql(aCursor, "select id from productdims where product = %s and version = %s", (aProduct, aVersion))
  except psy.SQLDidNotReturnSingleValue:
    lib_util.reportExceptionAndAbort(logger)
Beispiel #4
0
    def registration(self):
        """This function accomplishes the actual registration in the table
        inside Postgres.  There are four types of registrations, selected
        by the value of the configuration parameter 'processorId'.
            assume_new_identity - when processorId is 0.  The processor should
                                  just register in the table as a brand new
                                  processor.  A new id will be assigned to this
                                  processor.
            assume_specific_identity - when processorId is a non-zero integer.
                                       the processor should take over for a
                                       defunct processor with a specific ID.
            assume_any_identity - when the processorId is "auto".  The
                                  processor should look for any other
                                  registered processor that has died and take
                                  over for it.
            assume_identity_by_host - when the processorId is "host".
                                      the processor should look for another
                                      registered processor that has a name
                                      indicating that it came from the same
                                      host, and then take over for it.

        Each of the aforementioned registration methods is implemented by
        a function of the same name.  These are called via a dispatch table
        called 'dispatch_table'.  Since they are all called this way, they
        must all have the same parameters, hense a fat interface.  Not all
        parameters will be used by all methods."""
        self.logger.info("connecting to database")
        db_conn, db_cur = self.db_pool.connectionCursorPair()

        requested_id = self.requested_processor_id(self.config.processorId)
        hostname = self.os_module.uname()[1]
        self.processor_name = "%s_%d" % (hostname, self.os_module.getpid())
        threshold = self.sdb_module.singleValueSql(db_cur,
                                            self.NOW_SQL,
                                            (self.config.processorCheckInTime,)
                                            )
        dispatch_table = col.defaultdict(
            lambda: self.assume_specific_identity,
            {'auto': self.assume_any_identity,
             'host': self.assume_identity_by_host,
             'forcehost': self.force_assume_identity_by_host,
             0: self.assume_new_identity}
            )

        self.logger.info("registering with 'processors' table")
        try:
            self.processor_id = dispatch_table[requested_id](db_cur,
                                                            threshold,
                                                            hostname,
                                                            requested_id)
            db_conn.commit()
        except sdb.exceptions_eligible_for_retry:
            raise
        except Exception:
            db_conn.rollback()
            self.logger.critical('unable to complete registration')
            sutil.reportExceptionAndAbort(self.logger)
Beispiel #5
0
 def testReportExceptionAndAbort(self):
   logger = TestingLogger()
   try:
     util.reportExceptionAndAbort(logger)
     assert(False)
   except SystemExit,e:
     assert(True)
     assert(5 == len(logger.levels))
     assert([logging.CRITICAL,logging.CRITICAL,logging.CRITICAL,logging.CRITICAL,logging.CRITICAL] == logger.levels)
     assert("cannot continue - quitting" == logger.buffer[4])
Beispiel #6
0
def getProcessingWindow(configContext,tableName, productVersionRestriction,cursor,logger, **kwargs):
  """
  ProcessingWindow is a single time window over which to aggregate materialized view data.

  Returns (startWindow,deltaWindow,endWindow) using this heuristic:
  kwargs beats configContext which beats latest table row
  if two among startWindow, endWindow, deltaWindow in config or kwargs: they are used.
    if all three: assert startWindow + deltaWindow == endWindow
  Backward compatibility: if processingDay is present and windowXxx are not:
    startWindow = midnight of given day, deltaWindow = timedelta(days=1)
  else: try to read window_end and window_size from the given table
  if one is available from config/kwargs it beats the same (or calculated) one from the table
  On inconsistency or failure, logs the problem and aborts
  BEWARE: You can get inconsitency by having one item in config and the other two in kwargs: BEWARE
  """
  config = {}
  config.update(configContext)
  config.update(kwargs)
  startWindow = config.get('startWindow')
  if type(startWindow) is str:
    startWindow = cm.dateTimeConverter(startWindow)
  deltaWindow = config.get('deltaWindow')
  if type(deltaWindow) is str:
    deltaWindow = cm.timeDeltaConverter(deltaWindow)
  endWindow = config.get('endWindow')
  if type(endWindow) is str:
    endWindow = cm.dateTimeConverter(endWindow)
  processingDay = config.get('processingDay')
  if type(processingDay) is str:
    processingDay = cm.dateTimeConverter(processingDay)
  try:
    if startWindow or deltaWindow or endWindow:
      if startWindow and endWindow and deltaWindow:
        assert startWindow + deltaWindow == endWindow,"inconsistent: %s + %s != %s"%(startWindow,deltaWindow,endWindow)
      elif startWindow and endWindow:
        deltaWindow = endWindow - startWindow
      elif startWindow and deltaWindow:
        endWindow = startWindow + deltaWindow
      elif deltaWindow and endWindow:
        startWindow = endWindow - deltaWindow
      else:
        assert not (startWindow or deltaWindow or endWindow), "insufficient: Need two of window ...Start: %s, ...Delta: %s, ...End:%s"%(startWindow,deltaWindow,endWindow)
    elif processingDay:
      dayt = datetime.datetime.fromtimestamp(time.mktime(processingDay.timetuple()))
      startWindow = dayt.replace(hour=0,minute=0,second=0,microsecond=0)
      assert startWindow == dayt,'processingDay must be some midnight, but was %s'%dayt
      deltaWindow = datetime.timedelta(days=1)
      endWindow = startWindow + deltaWindow
    else: # no params: try table
      startWindow,deltaWindow = getLastWindowAndSizeFromTable(cursor,tableName, productVersionRestriction,logger)
      if startWindow:
        endWindow = startWindow+deltaWindow
    return (startWindow,deltaWindow,endWindow)
  except:
    lib_util.reportExceptionAndAbort(logger)
Beispiel #7
0
    def registration(self):
        """This function accomplishes the actual registration in the table
        inside Postgres.  There are four types of registrations, selected
        by the value of the configuration parameter 'processorId'.
            assume_new_identity - when processorId is 0.  The processor should
                                  just register in the table as a brand new
                                  processor.  A new id will be assigned to this
                                  processor.
            assume_specific_identity - when processorId is a non-zero integer.
                                       the processor should take over for a
                                       defunct processor with a specific ID.
            assume_any_identity - when the processorId is "auto".  The
                                  processor should look for any other
                                  registered processor that has died and take
                                  over for it.
            assume_identity_by_host - when the processorId is "host".
                                      the processor should look for another
                                      registered processor that has a name
                                      indicating that it came from the same
                                      host, and then take over for it.

        Each of the aforementioned registration methods is implemented by
        a function of the same name.  These are called via a dispatch table
        called 'dispatch_table'.  Since they are all called this way, they
        must all have the same parameters, hense a fat interface.  Not all
        parameters will be used by all methods."""
        self.logger.info("connecting to database")
        db_conn, db_cur = self.db_pool.connectionCursorPair()

        requested_id = self.requested_processor_id(self.config.processorId)
        hostname = self.os_module.uname()[1]
        self.processor_name = "%s_%d" % (hostname, self.os_module.getpid())
        threshold = self.sdb_module.singleValueSql(
            db_cur, self.NOW_SQL, (self.config.processorCheckInTime, ))
        dispatch_table = col.defaultdict(
            lambda: self.assume_specific_identity, {
                'auto': self.assume_any_identity,
                'host': self.assume_identity_by_host,
                'forcehost': self.force_assume_identity_by_host,
                0: self.assume_new_identity
            })

        self.logger.info("registering with 'processors' table")
        try:
            self.processor_id = dispatch_table[requested_id](db_cur, threshold,
                                                             hostname,
                                                             requested_id)
            db_conn.commit()
        except sdb.exceptions_eligible_for_retry:
            raise
        except Exception:
            db_conn.rollback()
            self.logger.critical('unable to complete registration')
            sutil.reportExceptionAndAbort(self.logger)
 def __init__(self,configContext):
   super(TopCrashesBySignature,self).__init__()
   try:
     assert "databaseHost" in configContext, "databaseHost is missing from the configuration"
     assert "databaseName" in configContext, "databaseName is missing from the configuration"
     assert "databaseUserName" in configContext, "databaseUserName is missing from the configuration"
     assert "databasePassword" in configContext, "databasePassword is missing from the configuration"
     databaseDSN = "host=%(databaseHost)s dbname=%(databaseName)s user=%(databaseUserName)s password=%(databasePassword)s" % configContext
     # Be sure self.connection is closed before you quit!
     self.connection = psycopg2.connect(databaseDSN)
   except (psycopg2.OperationalError, AssertionError),x:
     lib_util.reportExceptionAndAbort(logger)
Beispiel #9
0
 def testReportExceptionAndAbort(self):
     logger = TestingLogger()
     try:
         util.reportExceptionAndAbort(logger)
         assert (False)
     except SystemExit, e:
         assert (True)
         assert (5 == len(logger.levels))
         assert ([
             logging.CRITICAL, logging.CRITICAL, logging.CRITICAL,
             logging.CRITICAL, logging.CRITICAL
         ] == logger.levels)
         assert ("cannot continue - quitting" == logger.buffer[4])
Beispiel #10
0
def getLastWindowAndSizeFromTable(cursor, table, productVersionRestriction, logger):
  """
  cursor: database cursor
  table: name of table to check
  logger: in case trouble needs to be reported,
  Extracts and returns the most recent (window_end, window_size)
    - If there is no such table (or it has no such columns), logs failure and exits
    - If there is no such row, return (None,None)
  Checks that:
    - window_size is a whole number of minutes and that an integral number of them make a full day
    - window_size is an integral number of days (probably exactly one)
  If window_size is incorrect, logs failure and exits: The database is corrupt.
  """
  lastEnd, lastSize = None,None
  try:
    if productVersionRestriction:
      cursor.execute("SELECT window_end,window_size FROM %s where productdims_id = %s ORDER BY window_end DESC LIMIT 1" % (table, productVersionRestriction))
    else:
      cursor.execute("SELECT window_end,window_size FROM %s ORDER BY window_end DESC LIMIT 1" % table)
    cursor.connection.rollback()
  except:
    cursor.connection.rollback()
    lib_util.reportExceptionAndAbort(logger)
  try:
    lastEnd, lastSize = None,None
    (lastEnd,lastSize) = cursor.fetchone()
  except TypeError: # Don't log "NoneType object is not iterable"
    return lastEnd,lastSize
  except:
    lib_util.reportExceptionAndContinue(logger)
    return lastEnd,lastSize
  try:
    if 0 == lastSize.days:
      min = lastSize.seconds/60.0
      assert min > 0, 'Negative processing interval is not allowed, but got %s minutes'%min
      assert int(min) == min, 'processingInterval must be whole number of minutes, but got %s'%min
      assert 0 == (24*60)%min, 'Minutes in processing interval must divide evenly into a day, but got %d'%min
    else:
      day = lastSize.days
      assert day > 0, 'Negative processing interval is not allowed, but got %s days'%day
      assert 0 == lastSize.seconds, 'processing interval of days must have no left over seconds, but got %s'%lastSize.seconds
    usec = lastSize.microseconds
    assert 0 == usec, 'processing interval must have no fractional seconds, but got %s usecs'%usec
  except:
    lib_util.reportExceptionAndAbort(logger)
  return (lastEnd,lastSize)
Beispiel #11
0
def insertBuild(cursor, product_name, version, platform, build_id, build_type,
                beta_number, repository):
    """ Insert a particular build into the database """
    if not buildExists(cursor, product_name, version, platform, build_id,
                       build_type, beta_number, repository):
        sql = """ INSERT INTO releases_raw
                  (product_name, version, platform, build_id, build_type,
                   beta_number, repository)
                  VALUES (%s, %s, %s, %s, %s, %s, %s)"""

        try:
            params = (product_name, version, platform, build_id, build_type,
                      beta_number, repository)
            cursor.execute(sql, params)
            cursor.connection.commit()
            logger.info("Inserted: %s %s %s %s %s %s %s" % params)
        except Exception:
            cursor.connection.rollback()
            util.reportExceptionAndAbort(logger)
 def storeFacts(self, crashData, intervalString):
   """
   Store crash data in the top_crashes_by_signature table
     crashData: List of {productdims_id:id,osdims_id:id,signature:signatureString,'count':c,'uptime':u,'hang_count':hc,'plugin_count':pc} as produced by self.fixupCrashData()
   """
   if not crashData or 0 == len(crashData):
     logger.warn("%s - No data for interval %s",threading.currentThread().getName(),intervalString)
     return 0
   # else
   if self.debugging:
     logger.debug('Storing %s rows into table %s at %s',len(crashData),resultTable,intervalString)
   sql = """INSERT INTO %s
         (count, uptime, signature, productdims_id, osdims_id, window_end, window_size, hang_count, plugin_count)
         VALUES (%%(count)s,%%(uptime)s,%%(signature)s,%%(productdims_id)s,%%(osdims_id)s,%%(windowEnd)s,%%(windowSize)s,%%(hang_count)s,%%(plugin_count)s)
         """%(resultTable)
   cursor = self.connection.cursor()
   try:
     cursor.executemany(sql,crashData)
     self.connection.commit()
     return len(crashData)
   except Exception,x:
     self.connection.rollback()
     lib_util.reportExceptionAndAbort(logger)
Beispiel #13
0
    elif deltaDate and endDate:
      assert deltaDate > delta0, 'inconsistent: deltaDate %s <= 0'%(deltaDate)
      startDate = endDate - deltaDate
    else:
      assert not (startDate or deltaDate or endDate), "insufficient: Need two xxxDate: start: %s, delta: %s, end:%s"%(startDate,deltaDate,endDate)
      startDate = startDateFromTable
      endDate = endDateFromTable
      deltaDate = endDate - startDate
    if latestWindowEnd and startDate < latestWindowEnd:
      logger.info("given/calculated startDate: %s < latest row in %s. Changing to %s",startDate,tableName,latestWindowEnd)
      startDate = latestWindowEnd
      deltaDate = endDate - startDate
      assert deltaDate > delta0, 'inconsistent (after check with db table %s): deltaDate %s <= 0'%(tableName,deltaDate)
    return (startDate,deltaDate,endDate)
  except:
    lib_util.reportExceptionAndAbort(logger)

def getProcessingWindow(configContext,tableName, productVersionRestriction,cursor,logger, **kwargs):
  """
  ProcessingWindow is a single time window over which to aggregate materialized view data.

  Returns (startWindow,deltaWindow,endWindow) using this heuristic:
  kwargs beats configContext which beats latest table row
  if two among startWindow, endWindow, deltaWindow in config or kwargs: they are used.
    if all three: assert startWindow + deltaWindow == endWindow
  Backward compatibility: if processingDay is present and windowXxx are not:
    startWindow = midnight of given day, deltaWindow = timedelta(days=1)
  else: try to read window_end and window_size from the given table
  if one is available from config/kwargs it beats the same (or calculated) one from the table
  On inconsistency or failure, logs the problem and aborts
  BEWARE: You can get inconsitency by having one item in config and the other two in kwargs: BEWARE
Beispiel #14
0
 def saveData(self, windowStart, countUrlData):
   """
   given a time-window (start) and a list of (count,fullUrl), for each fullUrl:
     - assure that the fullUrl is legal and available in in urldims
     - collect count, window_end, window_size, productdims_id,osdims_id,urldims_id,signature for all quads that match
     - merge the counts for all the signatures with the same (product,os,url) and insert that data into top_crashes_by_url...
     - ...collecting all the signatures for the merged data.
     - Insert the new row id and each signature into top_crashes_by_url_signature
     - return the number of rows added to top_crashes_by_url
   """
   if not countUrlData:
     return 0
   # No need to get the count: It is always exactly 1 because uuid is unique and we group by it.
   selectSql = """SELECT %%(windowEnd)s, %%(windowSize)s, p.id as prod, o.id as os, r.signature, r.uuid, r.user_comments
                    FROM %(reportsTable)s r
                    JOIN productdims p on r.product = p.product AND r.version = p.version
                    JOIN osdims o on r.os_name = o.os_name AND r.os_version = o.os_version
                   WHERE %%(windowStart)s <= r.%(dateColumn)s AND r.%(dateColumn)s < %%(windowEnd)s
                     AND r.url = %%(fullUrl)s
                     %(productVersionSqlRestrictionPhrase)s
                   GROUP BY prod, os, r.signature, r.uuid, r.user_comments
                   """ % (self.configContext)
   getIdSql = """SELECT lastval()"""
   insertUrlSql = """INSERT INTO %(resultTable)s (count, urldims_id, productdims_id, osdims_id, window_end, window_size)
                     VALUES (%%(count)s,%%(urldimsId)s,%%(productdimsId)s,%%(osdimsId)s,%%(windowEnd)s,%%(windowSize)s)""" % (self.configContext)
   insertSigSql = """INSERT INTO %(resultSignatureTable)s (top_crashes_by_url_id,signature,count)
                     VALUES(%%s,%%s,%%s)""" % (self.configContext)
   windowData= {
     'windowStart': windowStart,
     'windowEnd': windowStart + self.configContext.deltaWindow,
     'windowSize': self.configContext.deltaWindow,
     }
   cursor = self.connection.cursor()
   insData = {}
   urldimsIdSet = set()
   urldimsIdCounter = {}
   for expectedCount,fullUrl in countUrlData:
     urldimsId = self.getUrlId(fullUrl) # updates urldims if needed
     if not urldimsId:
       continue
     urldimsIdCounter.setdefault(urldimsId,0)
     urldimsIdCounter[urldimsId] += 1
     if(urldimsIdCounter[urldimsId]) >= self.configContext.minimumHitsPerUrl:
       urldimsIdSet.add(urldimsId)
     selector = {'fullUrl':fullUrl}
     selector.update(windowData)
     cursor.execute(selectSql,selector)
     self.connection.rollback() # didn't modify, so rollback is enough
     data = cursor.fetchall()
     for (windowEnd, windowSize, productdimsId, osdimsId, signature, uuid, comment) in data:
       key = (productdimsId,urldimsId,osdimsId)
       insData.setdefault(key,{'count':0,'signatures':{}, 'uuidAndComments':[]})
       insData[key]['count'] += 1 # Count all urls that had a crash
       if signature: #don't handle empty signatures
         insData[key]['signatures'].setdefault(signature,0)
         insData[key]['signatures'][signature] += 1
       if uuid or comment: # always True, because uuid, but what the heck.
         insData[key]['uuidAndComments'].append((uuid,comment))
     if len(urldimsIdSet) > self.configContext.maximumUrls:
       break
   insertCount = 0
   try:
     # Looping 'quite awhile' without closing transaction. Rollback *should* revert everything except urldims which is benign
     # 'quite awhile' is up to 500 urls with up to (maybe nine or ten thousand?) correlations total. So ~~ 10K rows awaiting commit()
     signatureCorrelationData = []
     uuidCommentCorrelationData = []
     aKey = None
     stage = "in pre-loop"
     for key in insData:
       aKey = key
       stage = "inserting url crash for %s"%(str(aKey))
       if key[1] in urldimsIdSet: # then this urlid has at least minimumHitsPerUrl
         # the next line overwrites prior values (except the first time through)  with current values
         selector.update({'count':insData[key]['count'],'productdimsId':key[0],'urldimsId':key[1],'osdimsId':key[2]})
         # save the 'main' facts
         cursor.execute(insertUrlSql,selector)
         # grab the new row id
         stage = "getting new row id for %s"%(str(aKey))
         cursor.execute(getIdSql)
         newId = cursor.fetchone()[0]
         stage = "calculating secondary data for %s"%(str(aKey))
         # update data for correlations tables
         for signature,count in insData[key]['signatures'].items():
           signatureCorrelationData.append([newId,signature,count])
         for uuid,comment in insData[key]['uuidAndComments']:
           uuidCommentCorrelationData.append([uuid,comment,newId])
         insertCount += 1
       #end if key[1] in urldimsIdSet
     # end of loop over keys in insData
     # update the two correlation tables
     stage = "inserting signature correlations for %s"%(str(aKey))
     cursor.executemany(insertSigSql,signatureCorrelationData)
     stage = "commiting updates for %s"%(str(aKey))
     self.connection.commit()
     logger.info("Committed data for %s crashes for period %s up to %s",insertCount,windowStart,windowData['windowEnd'])
   except Exception,x:
     logger.warn("Exception while %s",stage)
     self.connection.rollback()
     socorro_util.reportExceptionAndAbort(logger)