Example #1
0
 def testSingleValueEmpty(self):
   try:
     cur = TestEmptyCursor()
     ppghelper.singleValueSql(cur,"")
     assert False, "must raise SQLDidNotReturnSingleValue"
   except ppghelper.SQLDidNotReturnSingleValue,e:
     pass
Example #2
0
 def testSingleValueEmpty(self):
   try:
     cur = TestEmptyCursor()
     ppghelper.singleValueSql(cur,"")
     assert False, "must raise SQLDidNotReturnSingleValue"
   except ppghelper.SQLDidNotReturnSingleValue,e:
     pass
Example #3
0
def signature_is_found(signature, databaseCursor):
    try:
        psy.singleValueSql(
            databaseCursor,
            "select id from reports where signature = %s limit 1",
            (signature, ))
        return True
    except psy.SQLDidNotReturnSingleValue:
        return False
Example #4
0
 def lookForPriorityJobsAlreadyInQueue(self, databaseCursor,
                                       setOfPriorityUuids):
     # check for uuids already in the queue
     for uuid in list(setOfPriorityUuids):
         self.quitCheck()
         try:
             prexistingJobOwner = psy.singleValueSql(
                 databaseCursor,
                 "select owner from jobs where uuid = '%s'" % uuid)
             logger.info(
                 "priority job %s was already in the queue, assigned to %d",
                 uuid, prexistingJobOwner)
             try:
                 databaseCursor.execute(
                     "insert into priority_jobs_%d (uuid) values ('%s')" %
                     (prexistingJobOwner, uuid))
             except psycopg2.ProgrammingError:
                 logger.debug(
                     "%s assigned to dead processor %d - wait for reassignment",
                     uuid, prexistingJobOwner)
                 # likely that the job is assigned to a dead processor
                 # skip processing it this time around - by next time hopefully it will have been
                 # re assigned to a live processor
                 databaseCursor.connection.rollback()
                 setOfPriorityUuids.remove(uuid)
                 continue
             databaseCursor.execute(
                 "delete from priorityjobs where uuid = %s", (uuid, ))
             databaseCursor.connection.commit()
             setOfPriorityUuids.remove(uuid)
         except psy.SQLDidNotReturnSingleValue:
             #logger.debug("priority job %s was not already in the queue", uuid)
             pass
Example #5
0
def getProductId(aProduct, aVersion, aCursor, logger):
  logger.debug("getProductId")
  if not aProduct or not aVersion:
    return None
  try:
    return psy.singleValueSql(aCursor, "select id from productdims where product = %s and version = %s", (aProduct, aVersion))
  except psy.SQLDidNotReturnSingleValue:
    lib_util.reportExceptionAndAbort(logger)
Example #6
0
 def unbalancedJobSchedulerIter(self, aCursor):
   """ This generator returns a sequence of active processorId without regard to job balance
   """
   logger.debug("unbalancedJobSchedulerIter: compiling list of active processors")
   try:
     threshold = psy.singleValueSql( aCursor, "select now() - interval '%s'" % self.config.processorCheckInTime)
     aCursor.execute("select id from processors where lastSeenDateTime > '%s'" % threshold)
     listOfProcessorIds = [aRow[0] for aRow in aCursor.fetchall()]
     if not listOfProcessorIds:
       raise Monitor.NoProcessorsRegisteredException("There are no active processors registered")
     while True:
       for aProcessorId in listOfProcessorIds:
         yield aProcessorId
   except Monitor.NoProcessorsRegisteredException:
     self.quit = True
     socorro.lib.util.reportExceptionAndAbort(logger)
Example #7
0
 def unbalancedJobSchedulerIter(self, aCursor):
     """ This generator returns a sequence of active processorId without regard to job balance
 """
     logger.debug(
         "unbalancedJobSchedulerIter: compiling list of active processors")
     try:
         threshold = psy.singleValueSql(
             aCursor, "select now() - interval '%s'" %
             self.config.processorCheckInTime)
         aCursor.execute(
             "select id from processors where lastSeenDateTime > '%s'" %
             threshold)
         listOfProcessorIds = [aRow[0] for aRow in aCursor.fetchall()]
         if not listOfProcessorIds:
             raise Monitor.NoProcessorsRegisteredException(
                 "There are no active processors registered")
         while True:
             for aProcessorId in listOfProcessorIds:
                 yield aProcessorId
     except Monitor.NoProcessorsRegisteredException:
         self.quit = True
         socorro.lib.util.reportExceptionAndAbort(logger)
Example #8
0
 def lookForPriorityJobsAlreadyInQueue(self, databaseCursor, setOfPriorityUuids):
   # check for uuids already in the queue
   for uuid in list(setOfPriorityUuids):
     self.quitCheck()
     try:
       prexistingJobOwner = psy.singleValueSql(databaseCursor, "select owner from jobs where uuid = '%s'" % uuid)
       logger.info("priority job %s was already in the queue, assigned to %d", uuid, prexistingJobOwner)
       try:
         databaseCursor.execute("insert into priority_jobs_%d (uuid) values ('%s')" % (prexistingJobOwner, uuid))
       except psycopg2.ProgrammingError:
         logger.debug("%s assigned to dead processor %d - wait for reassignment", uuid, prexistingJobOwner)
         # likely that the job is assigned to a dead processor
         # skip processing it this time around - by next time hopefully it will have been
         # re assigned to a live processor
         databaseCursor.connection.rollback()
         setOfPriorityUuids.remove(uuid)
         continue
       databaseCursor.execute("delete from priorityjobs where uuid = %s", (uuid,))
       databaseCursor.connection.commit()
       setOfPriorityUuids.remove(uuid)
     except psy.SQLDidNotReturnSingleValue:
       #logger.debug("priority job %s was not already in the queue", uuid)
       pass
Example #9
0
def signature_is_found(signature, databaseCursor):
  try:
    psy.singleValueSql(databaseCursor, "select id from reports where signature = %s limit 1", (signature,))
    return True
  except psy.SQLDidNotReturnSingleValue:
    return False
Example #10
0
 def cleanUpDeadProcessors(self, aCursor):
     """ look for dead processors - find all the jobs of dead processors and assign them to live processors
     then delete the dead processors
 """
     logger.info("looking for dead processors")
     try:
         logger.info("threshold %s", self.config.processorCheckInTime)
         threshold = psy.singleValueSql(
             aCursor, "select now() - interval '%s' * 2" %
             self.config.processorCheckInTime)
         #sql = "select id from processors where lastSeenDateTime < '%s'" % (threshold,)
         #logger.info("dead processors sql: %s", sql)
         aCursor.execute(
             "select id from processors where lastSeenDateTime < '%s'" %
             (threshold, ))
         deadProcessors = aCursor.fetchall()
         aCursor.connection.commit()
         logger.info("dead processors: %s", str(deadProcessors))
         if deadProcessors:
             logger.info("found dead processor(s):")
             for aDeadProcessorTuple in deadProcessors:
                 logger.info("%d is dead", aDeadProcessorTuple[0])
             stringOfDeadProcessorIds = ", ".join(
                 [str(x[0]) for x in deadProcessors])
             logger.info("getting list of live processor(s):")
             aCursor.execute(
                 "select id from processors where lastSeenDateTime >= '%s'"
                 % threshold)
             liveProcessors = aCursor.fetchall()
             if not liveProcessors:
                 raise Monitor.NoProcessorsRegisteredException(
                     "There are no processors registered")
             numberOfLiveProcessors = len(liveProcessors)
             logger.info(
                 "getting range of queued date for jobs associated with dead processor(s):"
             )
             aCursor.execute(
                 "select min(queueddatetime), max(queueddatetime) from jobs where owner in (%s)"
                 % stringOfDeadProcessorIds)
             earliestDeadJob, latestDeadJob = aCursor.fetchall()[0]
             if earliestDeadJob is not None and latestDeadJob is not None:
                 timeIncrement = (latestDeadJob -
                                  earliestDeadJob) / numberOfLiveProcessors
                 for x, liveProcessorId in enumerate(liveProcessors):
                     lowQueuedTime = x * timeIncrement + earliestDeadJob
                     highQueuedTime = (x +
                                       1) * timeIncrement + earliestDeadJob
                     logger.info(
                         "assigning jobs from %s to %s to processor %s:",
                         str(lowQueuedTime), str(highQueuedTime),
                         liveProcessorId)
                     # why is the range >= at both ends? the range must be inclusive, the risk of moving a job twice is low and consequences low, too.
                     # 1st step: take any jobs of a dead processor that were in progress and reset them to unprocessed
                     aCursor.execute(
                         """update jobs
                               set starteddatetime = NULL
                            where
                               %%s >= queueddatetime
                               and queueddatetime >= %%s
                               and owner in (%s)
                               and success is NULL""" %
                         stringOfDeadProcessorIds,
                         (highQueuedTime, lowQueuedTime))
                     # 2nd step: take all jobs of a dead processor and give them to a new owner
                     aCursor.execute(
                         """update jobs
                               set owner = %%s
                            where
                               %%s >= queueddatetime
                               and queueddatetime >= %%s
                               and owner in (%s)""" %
                         stringOfDeadProcessorIds,
                         (liveProcessorId, highQueuedTime, lowQueuedTime))
                     aCursor.connection.commit()
             #3rd step - transfer stalled priority jobs to new processor
             for deadProcessorTuple in deadProcessors:
                 logger.info(
                     "re-assigning priority jobs from processor %d:",
                     deadProcessorTuple[0])
                 try:
                     aCursor.execute(
                         """insert into priorityjobs (uuid) select uuid from priority_jobs_%d"""
                         % deadProcessorTuple)
                     aCursor.connection.commit()
                 except:
                     aCursor.connection.rollback()
             logger.info("removing all dead processors")
             aCursor.execute(
                 "delete from processors where lastSeenDateTime < '%s'" %
                 threshold)
             aCursor.connection.commit()
             # remove dead processors' priority tables
             for aDeadProcessorTuple in deadProcessors:
                 try:
                     aCursor.execute("drop table priority_jobs_%d" %
                                     aDeadProcessorTuple[0])
                     aCursor.connection.commit()
                 except:
                     logger.warning(
                         "cannot clean up dead processor in database: the table 'priority_jobs_%d' may need manual deletion",
                         aDeadProcessorTuple[0])
                     aCursor.connection.rollback()
     except Monitor.NoProcessorsRegisteredException:
         self.quit = True
         socorro.lib.util.reportExceptionAndAbort(logger,
                                                  showTraceback=False)
     except:
         socorro.lib.util.reportExceptionAndContinue(logger)
Example #11
0
 def testSingleValueSingle(self):
   try:
     cur = TestSingleCursor()
     assert "Row 0, Column 0" == ppghelper.singleValueSql(cur,"")
   except Exception, e:
     assert False, "must not raise an exception for this %s" %e
Example #12
0
 def testSingleValueSingle(self):
   try:
     cur = TestSingleCursor()
     assert "Row 0, Column 0" == ppghelper.singleValueSql(cur,"")
   except Exception, e:
     assert False, "must not raise an exception for this %s" %e
Example #13
0
 def testSingleValueMulti(self):
   try:
     cur = TestMultiCursor(numRows=5)
     assert "Row 0, Column 0" == ppghelper.singleValueSql(cur,"")
   except Exception, e:
     assert False, "must not raise an exception for this "+e
Example #14
0
 def testSingleValueMulti(self):
   try:
     cur = TestMultiCursor(numRows=5)
     assert "Row 0, Column 0" == ppghelper.singleValueSql(cur,"")
   except Exception, e:
     assert False, "must not raise an exception for this "+e
Example #15
0
    try:
      last_crash = int(jsonDocument['SecondsSinceLastCrash'])
    except:
      last_crash = None
    newReportsRowTuple = (uuid, crash_date, date_processed, product, version, buildID, url, install_age, last_crash, uptime, email, build_date, user_id, user_comments, app_notes, distributor, distributor_version, log_file)
    try:
      logger.debug("%s - inserting for %s, %s", threading.currentThread().getName(), uuid, str(date_processed))
      self.reportsTable.insert(threadLocalCursor, newReportsRowTuple, self.databaseConnectionPool.connectToDatabase, date_processed=date_processed)
    except psycopg2.IntegrityError, x:
      logger.debug("%s - psycopg2.IntegrityError %s", threading.currentThread().getName(), str(x))
      logger.debug("%s - %s: this report already exists for date: %s",  threading.currentThread().getName(), uuid, str(date_processed))
      threadLocalCursor.connection.rollback()
      previousTrialWasSuccessful = psy.singleValueSql(threadLocalCursor, "select success from reports where uuid = '%s' and date_processed = timestamp without time zone '%s'" % (uuid, date_processed))
      if previousTrialWasSuccessful:
        raise DuplicateEntryException(uuid)
      threadLocalCursor.execute("delete from reports where uuid = '%s' and date_processed = timestamp without time zone '%s'" % (uuid, date_processed))
      processorErrorMessages.append("INFO: This record is a replacement for a previous record with the same uuid")
      self.reportsTable.insert(threadLocalCursor, newReportsRowTuple, self.databaseConnectionPool.connectToDatabase, date_processed=date_processed)
    reportId = psy.singleValueSql(threadLocalCursor, "select id from reports where uuid = '%s' and date_processed = timestamp without time zone '%s'" % (uuid, date_processed))
    return reportId

  #-----------------------------------------------------------------------------------------------------------------
  def doBreakpadStackDumpAnalysis (self, reportId, uuid, dumpfilePathname, databaseCursor, date_processed, processorErrorMessages):
    """ This function is run only by a worker thread.
        This function must be overriden in a subclass - this method will invoke the breakpad_stackwalk process
        (if necessary) and then do the anaylsis of the output
    """
    raise Exception("No breakpad_stackwalk invocation method specified")


Example #16
0
 def cleanUpDeadProcessors (self, aCursor):
   """ look for dead processors - find all the jobs of dead processors and assign them to live processors
       then delete the dead processors
   """
   logger.info("looking for dead processors")
   try:
     logger.info("threshold %s", self.config.processorCheckInTime)
     threshold = psy.singleValueSql(aCursor, "select now() - interval '%s' * 2" % self.config.processorCheckInTime)
     #sql = "select id from processors where lastSeenDateTime < '%s'" % (threshold,)
     #logger.info("dead processors sql: %s", sql)
     aCursor.execute("select id from processors where lastSeenDateTime < '%s'" % (threshold,))
     deadProcessors = aCursor.fetchall()
     aCursor.connection.commit()
     logger.info("dead processors: %s", str(deadProcessors))
     if deadProcessors:
       logger.info("found dead processor(s):")
       for aDeadProcessorTuple in deadProcessors:
         logger.info("%d is dead", aDeadProcessorTuple[0])
       stringOfDeadProcessorIds = ", ".join([str(x[0]) for x in deadProcessors])
       logger.info("getting list of live processor(s):")
       aCursor.execute("select id from processors where lastSeenDateTime >= '%s'" % threshold)
       liveProcessors = aCursor.fetchall()
       if not liveProcessors:
         raise Monitor.NoProcessorsRegisteredException("There are no processors registered")
       numberOfLiveProcessors = len(liveProcessors)
       logger.info("getting range of queued date for jobs associated with dead processor(s):")
       aCursor.execute("select min(queueddatetime), max(queueddatetime) from jobs where owner in (%s)" % stringOfDeadProcessorIds)
       earliestDeadJob, latestDeadJob = aCursor.fetchall()[0]
       if earliestDeadJob is not None and latestDeadJob is not None:
         timeIncrement = (latestDeadJob - earliestDeadJob) / numberOfLiveProcessors
         for x, liveProcessorId in enumerate(liveProcessors):
           lowQueuedTime = x * timeIncrement + earliestDeadJob
           highQueuedTime = (x + 1) * timeIncrement + earliestDeadJob
           logger.info("assigning jobs from %s to %s to processor %s:", str(lowQueuedTime), str(highQueuedTime), liveProcessorId)
           # why is the range >= at both ends? the range must be inclusive, the risk of moving a job twice is low and consequences low, too.
           # 1st step: take any jobs of a dead processor that were in progress and reset them to unprocessed
           aCursor.execute("""update jobs
                                 set starteddatetime = NULL
                              where
                                 %%s >= queueddatetime
                                 and queueddatetime >= %%s
                                 and owner in (%s)
                                 and success is NULL""" % stringOfDeadProcessorIds, (highQueuedTime, lowQueuedTime))
           # 2nd step: take all jobs of a dead processor and give them to a new owner
           aCursor.execute("""update jobs
                                 set owner = %%s
                              where
                                 %%s >= queueddatetime
                                 and queueddatetime >= %%s
                                 and owner in (%s)""" % stringOfDeadProcessorIds, (liveProcessorId, highQueuedTime, lowQueuedTime))
           aCursor.connection.commit()
       #3rd step - transfer stalled priority jobs to new processor
       for deadProcessorTuple in deadProcessors:
         logger.info("re-assigning priority jobs from processor %d:", deadProcessorTuple[0])
         try:
           aCursor.execute("""insert into priorityjobs (uuid) select uuid from priority_jobs_%d""" % deadProcessorTuple)
           aCursor.connection.commit()
         except:
           aCursor.connection.rollback()
       logger.info("removing all dead processors")
       aCursor.execute("delete from processors where lastSeenDateTime < '%s'" % threshold)
       aCursor.connection.commit()
       # remove dead processors' priority tables
       for aDeadProcessorTuple in deadProcessors:
         try:
           aCursor.execute("drop table priority_jobs_%d" % aDeadProcessorTuple[0])
           aCursor.connection.commit()
         except:
           logger.warning("cannot clean up dead processor in database: the table 'priority_jobs_%d' may need manual deletion", aDeadProcessorTuple[0])
           aCursor.connection.rollback()
   except Monitor.NoProcessorsRegisteredException:
     self.quit = True
     socorro.lib.util.reportExceptionAndAbort(logger, showTraceback=False)
   except:
     socorro.lib.util.reportExceptionAndContinue(logger)
Example #17
0
  def __init__ (self, config):
    """
    """
    super(Processor, self).__init__()

    assert "databaseHost" in config, "databaseHost is missing from the configuration"
    assert "databaseName" in config, "databaseName is missing from the configuration"
    assert "databaseUserName" in config, "databaseUserName is missing from the configuration"
    assert "databasePassword" in config, "databasePassword is missing from the configuration"
    assert "storageRoot" in config, "storageRoot is missing from the configuration"
    assert "deferredStorageRoot" in config, "deferredStorageRoot is missing from the configuration"
    assert "jsonFileSuffix" in config, "jsonFileSuffix is missing from the configuration"
    assert "dumpFileSuffix" in config, "dumpFileSuffix is missing from the configuration"
    assert "processorCheckInTime" in config, "processorCheckInTime is missing from the configuration"
    assert "processorCheckInFrequency" in config, "processorCheckInFrequency is missing from the configuration"
    assert "processorId" in config, "processorId is missing from the configuration"
    assert "numberOfThreads" in config, "numberOfThreads is missing from the configuration"
    assert "batchJobLimit" in config, "batchJobLimit is missing from the configuration"
    assert "irrelevantSignatureRegEx" in config, "irrelevantSignatureRegEx is missing from the configuration"
    assert "prefixSignatureRegEx" in config, "prefixSignatureRegEx is missing from the configuration"

    self.databaseConnectionPool = psy.DatabaseConnectionPool(config.databaseHost, config.databaseName, config.databaseUserName, config.databasePassword, logger)

    self.processorLoopTime = config.processorLoopTime.seconds

    self.config = config
    self.quit = False
    signal.signal(signal.SIGTERM, Processor.respondToSIGTERM)
    signal.signal(signal.SIGHUP, Processor.respondToSIGTERM)

    self.irrelevantSignatureRegEx = re.compile(self.config.irrelevantSignatureRegEx)
    self.prefixSignatureRegEx = re.compile(self.config.prefixSignatureRegEx)

    self.reportsTable = sch.ReportsTable(logger=logger)
    self.dumpsTable = sch.DumpsTable(logger=logger)
    self.extensionsTable = sch.ExtensionsTable(logger=logger)
    self.framesTable = sch.FramesTable(logger=logger)

    logger.info("%s - connecting to database", threading.currentThread().getName())
    try:
      databaseConnection, databaseCursor = self.databaseConnectionPool.connectionCursorPair()
    except:
      self.quit = True
      logger.critical("%s - cannot connect to the database", threading.currentThread().getName())
      socorro.lib.util.reportExceptionAndAbort(logger) # can't continue without a database connection

    # register self with the processors table in the database
    # Must request 'auto' id, or an id number that is in the processors table AND not alive
    logger.info("%s - registering with 'processors' table", threading.currentThread().getName())
    priorityCreateRuberic = "Since we took over, it probably exists."
    self.processorId = None
    legalOption = False
    try:
      requestedId = 0
      try:
        requestedId = int(self.config.processorId)
      except ValueError:
        if 'auto' == self.config.processorId:
          requestedId = 'auto'
        else:
          raise socorro.lib.ConfigurationManager.OptionError("%s is not a valid option for processorId" % self.config.processorId)
      self.processorName = "%s_%d" % (os.uname()[1], os.getpid())
      threshold = psy.singleValueSql(databaseCursor, "select now() - interval '%s'" % self.config.processorCheckInTime)
      if requestedId == 'auto':  # take over for an existing processor
        logger.debug("%s - looking for a dead processor", threading.currentThread().getName())
        try:
          self.processorId = psy.singleValueSql(databaseCursor, "select id from processors where lastseendatetime < '%s' limit 1" % threshold)
          logger.info("%s - will step in for processor %d", threading.currentThread().getName(), self.processorId)
        except psy.SQLDidNotReturnSingleValue:
          logger.debug("%s - no dead processor found", threading.currentThread().getName())
          requestedId = 0 # signal that we found no dead processors
      else: # requestedId is an integer: We already raised OptionError if not
        try:
          # singleValueSql should actually accept sql with placeholders and an array of values instead of just a string. Enhancement needed...
          checkSql = "select id from processors where lastSeenDateTime < '%s' and id = %s" % (threshold,requestedId)
          self.processorId = psy.singleValueSql(databaseCursor, checkSql)
          logger.info("%s - stepping in for processor %d", threading.currentThread().getName(), self.processorId)
        except psy.SQLDidNotReturnSingleValue,x:
          raise socorro.lib.ConfigurationManager.OptionError("ProcessorId %s is not in processors table or is still live."%requestedId)
      if requestedId == 0:
        try:
          databaseCursor.execute("insert into processors (name, startdatetime, lastseendatetime) values (%s, now(), now())", (self.processorName,))
          self.processorId = psy.singleValueSql(databaseCursor, "select id from processors where name = '%s'" % (self.processorName,))
        except:
          databaseConnection.rollback()
          raise
        logger.info("%s - initializing as processor %d", threading.currentThread().getName(), self.processorId)
        priorityCreateRuberic = "Does it already exist?"
        # We have a good processorId and a name. Register self with database
      try:
        databaseCursor.execute("update processors set name = %s, startdatetime = now(), lastseendatetime = now() where id = %s", (self.processorName, self.processorId))
        databaseCursor.execute("update jobs set starteddatetime = NULL where id in (select id from jobs where starteddatetime is not null and success is null and owner = %s)", (self.processorId, ))
      except Exception,x:
        logger.critical("Constructor: Unable to update processors or jobs table: %s: %s",type(x),x)
        databaseConnection.rollback()
        raise
Example #18
0
 def insertReportIntoDatabase(self, threadLocalCursor, uuid, jsonDocument, jobPathname, date_processed, processorErrorMessages):
   """
   This function is run only by a worker thread.
     Create the record for the current job in the 'reports' table
     input parameters:
       threadLocalCursor: a database cursor for exclusive use by the calling thread
       uuid: the unique id identifying the job - corresponds with the uuid column in the 'jobs' and the 'reports' tables
       jsonDocument: an object with a dictionary interface for fetching the components of the json document
       jobPathname:  the complete pathname for the json document
       date_processed: when job came in (a key used in partitioning)
       processorErrorMessages: list of strings of error messages
     jsonDocument MUST contain (to be useful)                       : stored in table `reports`
       BuildID: 10-character date, as: datetime.strftime('%Y%m%d%H'): in column `build`
       ProductName: Any string with length <= 30                    : in column `product`
       Version: Any string with length <= 16                        : in column `version`
       CrashTime(preferred), or
       timestamp (deprecated): decimal unix timestamp               : in column `client_crash_date`
       logfile                                                      : in column `logfile`
     jsonDocument SHOULD contain:
       StartupTime: decimal unix timestamp of 10 or fewer digits    : in column `uptime` = crash_time - startupTime
       InstallTime: decimal unix timestamp of 10 or fewer digits    : in column `install_age` = crash_time - installTime
       SecondsSinceLastCrash: some integer value                    : in column `last_crash`
     jsonDocument MAY contain:
       Comments: Length <= 500                                      : in column `user_comments`
       Notes:    Length <= 1000                                     : in column `app_notes`
       Distributor: Length <= 20                                    : in column `distributor`
       Distributor_version: Length <= 20                            : in column `distributor_version`
   """
   logger.debug("%s - starting insertReportIntoDatabase", threading.currentThread().getName())
   product = Processor.getJsonOrWarn(jsonDocument,'ProductName',processorErrorMessages,"no product", 30)
   version = Processor.getJsonOrWarn(jsonDocument,'Version', processorErrorMessages,'no version',16)
   buildID =   Processor.getJsonOrWarn(jsonDocument,'BuildID', processorErrorMessages,None,16)
   url = socorro.lib.util.lookupLimitedStringOrNone(jsonDocument, 'URL', 255)
   email = None   # we stopped collecting user email per user privacy concerns
   user_id = None # we stopped collecting user id too
   user_comments = socorro.lib.util.lookupLimitedStringOrNone(jsonDocument, 'Comments', 500)
   app_notes = socorro.lib.util.lookupLimitedStringOrNone(jsonDocument, 'Notes', 1000)
   distributor = socorro.lib.util.lookupLimitedStringOrNone(jsonDocument, 'Distributor', 20)
   distributor_version = socorro.lib.util.lookupLimitedStringOrNone(jsonDocument, 'Distributor_version', 20)
   log_file = jsonDocument["logfile"]
   crash_time = None
   install_age = None
   uptime = 0
   crash_date = date_processed
   defaultCrashTime = int(time.mktime(date_processed.timetuple())) # must have crashed before date processed
   timestampTime = int(jsonDocument.get('timestamp',defaultCrashTime)) # the old name for crash time
   crash_time = int(Processor.getJsonOrWarn(jsonDocument,'CrashTime',processorErrorMessages,timestampTime,10))
   startupTime = int(jsonDocument.get('StartupTime',crash_time)) # must have started up some time before crash
   installTime = int(jsonDocument.get('InstallTime',startupTime)) # must have installed some time before startup
   crash_date = datetime.datetime.fromtimestamp(crash_time, Processor.utctz)
   install_age = crash_time - installTime
   uptime = max(0, crash_time - startupTime)
   if crash_time == defaultCrashTime:
     logger.warning("%s - no 'crash_time' calculated in %s: Using date_processed", threading.currentThread().getName(), jobPathname)
     #socorro.lib.util.reportExceptionAndContinue(logger, logging.WARNING)
     processorErrorMessages.append("WARNING: No 'client_crash_date' could be determined from the Json file")
   build_date = None
   if buildID:
     try:
       build_date = datetime.datetime(*[int(x) for x in Processor.buildDatePattern.match(str(buildID)).groups()])
     except (AttributeError, ValueError, KeyError):
       logger.warning("%s - no 'build_date' calculated in %s", threading.currentThread().getName(), jobPathname)
       processorErrorMessages.append("WARNING: No 'build_date' could be determined from the Json file")
       socorro.lib.util.reportExceptionAndContinue(logger, logging.WARNING)
   try:
     last_crash = int(jsonDocument['SecondsSinceLastCrash'])
   except:
     last_crash = None
   newReportsRowTuple = (uuid, crash_date, date_processed, product, version, buildID, url, install_age, last_crash, uptime, email, build_date, user_id, user_comments, app_notes, distributor, distributor_version, log_file)
   try:
     logger.debug("%s - inserting for %s, %s", threading.currentThread().getName(), uuid, str(date_processed))
     self.reportsTable.insert(threadLocalCursor, newReportsRowTuple, self.databaseConnectionPool.connectToDatabase, date_processed=date_processed)
   except psycopg2.IntegrityError, x:
     logger.debug("%s - psycopg2.IntegrityError %s", threading.currentThread().getName(), str(x))
     logger.debug("%s - %s: this report already exists for date: %s",  threading.currentThread().getName(), uuid, str(date_processed))
     threadLocalCursor.connection.rollback()
     previousTrialWasSuccessful = psy.singleValueSql(threadLocalCursor, "select success from reports where uuid = '%s' and date_processed = timestamp without time zone '%s'" % (uuid, date_processed))
     if previousTrialWasSuccessful:
       raise DuplicateEntryException(uuid)
     threadLocalCursor.execute("delete from reports where uuid = '%s' and date_processed = timestamp without time zone '%s'" % (uuid, date_processed))
     processorErrorMessages.append("INFO: This record is a replacement for a previous record with the same uuid")
     self.reportsTable.insert(threadLocalCursor, newReportsRowTuple, self.databaseConnectionPool.connectToDatabase, date_processed=date_processed)