Python getProcessingWindow Examples, socorro.cron.util.getProcessingWindow Python Examples

Example #1

0

Show file

File: topCrashesByUrl.py Project: Meghashyamt/socorro

 def countCrashesByUrlInWindow(self,**kwargs):
   """
   Collect the count of all crashes per url within this time window.
   Deliberately ignore platform and os details to get counts per url on a global basis
   return [(count, url),...] for as many as maximumUrls hits within the time window, each with at least minimumHitsPerUrl.
   """
   cur = self.connection.cursor()
   windowStart,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,self.productVersionRestriction,cur,logger,**kwargs)
   if not windowStart: # we don't care why
     return []
   self.configContext['deltaWindow'] = deltaWindow
   selector = {'startDate':windowStart,'endDate':(windowStart + deltaWindow)}
   topUrlSql = """SELECT COUNT(r.id), r.url FROM %(reportsTable)s r
                    JOIN productdims p ON r.product = p.product AND r.version = p.version
                    JOIN product_visibility cfg ON p.id = cfg.productdims_id
                    WHERE r.url IS NOT NULL AND r.url <> '' AND %%(startDate)s <= r.%(dateColumn)s AND r.%(dateColumn)s < %%(endDate)s
                    AND cfg.start_date <= r.%(dateColumn)s AND r.%(dateColumn)s <= cfg.end_date
                    %(productVersionSqlRestrictionPhrase)s
                    GROUP BY r.url
                    ORDER BY COUNT(r.id) desc
                    LIMIT %(fatMaximumUrls)s"""%(self.configContext) # fatMaximumUrls is a HACK to assure enough cooked urls
   cur.execute(topUrlSql,selector)
   data = cur.fetchall() # count (implicit rank) and url here.
   self.connection.rollback() # per suggestion in psycopg mailing list: Rollback if no db modification
   if not data:
     logger.warn("No url crash data collected between %(startDate)s and %(endDate)s",selector)
   return data

Example #2

0

Show file

File: topCrashesBySignature.py Project: Meghashyamt/socorro

 def processDateInterval(self,**kwargs):
   """
   Loop over all the processingIntervals within the specified startDate, endDate period:
   gathering, orgainizing and storing he summary data for each interval.
   Parameters in kwargs can be used to override the same parameters passed to self's constructor:
   startDate, endDate, dateColumnName, processingInterval
   In addition, you may pass a map as summaryCrashes which will be extended in the first processing interval
   """
   summaryCrashes = kwargs.get('summaryCrashes',{})
   oldDateColumnName = self.dateColumnName
   self.dateColumnName = kwargs.get('dateColumnName',self.dateColumnName)
   revertDateColumnName = (self.dateColumnName != oldDateColumnName)
   cursor = self.connection.cursor()
   startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,self.productVersionRestriction,cursor,logger,**kwargs)
   startWindow = self.startDate
   try:
     fullCount = 0
     while startWindow + deltaWindow <= self.endDate:
       logger.debug("%s - Processing with interval from %s, size=%s)",threading.currentThread().getName(),startWindow,deltaWindow)
       summaryCrashes = self.extractDataForPeriod(startWindow, startWindow+deltaWindow, summaryCrashes)
       data = self.fixupCrashData(summaryCrashes,startWindow+deltaWindow,deltaWindow)
       fullCount += self.storeFacts(data, "Start: %s, size=%s"%(startWindow,deltaWindow))
       summaryCrashes = {}
       startWindow += deltaWindow
   finally:
     self.connection.close()
     if revertDateColumnName:
       self.dateColumnName = oldDateColumnName
   return fullCount

Example #3

0

Show file

File: topCrashesByUrl.py Project: Meghashyamt/socorro

 def processDateInterval(self, **kwargs):
   cursor = self.connection.cursor()
   kwargs.setdefault('defaultDeltaWindow',defaultDeltaWindow)
   startDate,deltaDate,endDate = cron_util.getProcessingDates(self.configContext, resultTable, self.productVersionRestriction, cursor, logger, **kwargs)
   startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext, resultTable,self.productVersionRestriction,cursor, logger, **kwargs)
   logger.info("Starting loop from %s up to %s step (%s)",startDate.isoformat(),endDate.isoformat(),deltaWindow)
   if not startWindow:
     startWindow = startDate
   if not deltaWindow:
     deltaWindow = defaultDeltaWindow
   while startWindow + deltaWindow < endDate:
     data = self.countCrashesByUrlInWindow(startWindow=startWindow,deltaWindow=deltaWindow)
     if data:
       logger.info("Saving %s items in window starting at %s",len(data),startWindow)
       self.saveData(startWindow,data)
     else:
       logger.info("Window starting at %s had no data",startWindow)
     # whether or not we saved some data, advance to next slot
     startWindow += deltaWindow
   logger.info("Done processIntervals")

Example #4

0

Show file

File: mtbf.py Project: AlinT/socorro

 def processDateInterval(self, **kwargs):
   """
   call processOneMtbfWindow repeatedly for each window in the range defined by at least two paramerters among (start|delta|end)Date
   Other kwargs/context values are passed unchanged to processOneMtbfWindow
   """
   cur = self.connection.cursor()
   now = datetime.datetime.now()
   startDate, deltaDate, endDate = cron_util.getProcessingDates(self.configContext,resultTable,cur,self.logger,**kwargs)
   startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,cur,self.logger,**kwargs)
   if not startDate and not startWindow:
     self.logger.warn("MTBF (%s): No startDate, no startWindow. Did not run.",now)
     return 0
   if not startWindow: # we are guaranteed a startDate after the test above
     startWindow = startDate
   if not startDate or startDate > startWindow:
     startDate = startWindow
   if not deltaWindow:
     deltaWindow = datetime.timedelta(days=1)
   thisMidnight = now.replace(hour=0,minute=0,second=0,microsecond=0)
   if not endDate or endDate > thisMidnight:
     endDate = thisMidnight
   if startDate + deltaWindow > endDate:
     self.logger.warn("MTBF (%s) startDate (%s) too close to endDate (%s). Did not run.",now, startDate,endDate)
     return 0
   count = 0
   if self.debugging:
     self.logger.debug("""mtbf.processDateInterval:
     startDate: %s, deltaDate: %s, endDate: %s
     startWindow: %s, deltaWindow: %s, endWindow: %s """,startDate,deltaDate,endDate,startWindow,deltaWindow,endWindow)
   kwargs['deltaWindow'] = deltaWindow
   startWindow = startDate
   while startWindow + deltaWindow < endDate:
     kwargs['startWindow'] = startWindow
     self.processOneMtbfWindow(**kwargs)
     startWindow += deltaWindow
     count += 1
   return count

Example #5

0

Show file

File: testUtil.py Project: AlinT/socorro

  def testGetProcessingWindow(self):
    cursor = self.connection.cursor()
    config = {}
    # check that a really empty system fails
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger)

    self.createBunny()
    # check that nothing useful yields nothing
    mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger)
    assert (None,None,None) == mm, 'Expected None*3, got %s'%(str(mm))

    start = datetime.datetime(2000,1,2,12,12)
    delta = datetime.timedelta(seconds=300)
    end = start+delta

    procDay = datetime.date(2001,9,8)
    procStart = datetime.datetime(2001,9,8)
    procDelta = datetime.timedelta(days=1)
    procEnd = procStart+procDelta

    # check that just one kwarg raises SystemExit
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,startWindow=start)
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,endWindow=start)
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,deltaWindow=delta)

    # check that just one config raises SystemExit
    assert_raises(SystemExit,cron_util.getProcessingWindow,{'startWindow':start},self.tableName,None,cursor,me.fileLogger)
    assert_raises(SystemExit,cron_util.getProcessingWindow,{'endWindow':end},self.tableName,None,cursor,me.fileLogger)
    assert_raises(SystemExit,cron_util.getProcessingWindow,{'deltaWindow':delta},self.tableName,None,cursor,me.fileLogger)

    # check that processingDay doesn't help
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,deltaWindow=delta,processingDay=procDay)
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,startWindow=start,processingDay=procDay)
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,endWindow=start,processingDay=procDay)
    # ... with config either
    assert_raises(SystemExit,cron_util.getProcessingWindow,{'startWindow':start},self.tableName,None,cursor,me.fileLogger,processingDay=procDay)
    assert_raises(SystemExit,cron_util.getProcessingWindow,{'endWindow':end},self.tableName,None,cursor,me.fileLogger,processingDay=procDay)
    assert_raises(SystemExit,cron_util.getProcessingWindow,{'deltaWindow':delta,'processingDay':procDay},self.tableName,None,cursor,me.fileLogger)

    # check that any two kwargs work correctly
    mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,endWindow=end,startWindow=start)
    assert (start,delta,end) == mm, 'But got %s'%(str(mm))
    mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,deltaWindow=delta,startWindow=start)
    assert (start,delta,end) == mm, 'But got %s'%(str(mm))
    mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,endWindow=end,deltaWindow=delta)
    assert (start,delta,end) == mm, 'But got %s'%(str(mm))

    # and that two configs work
    mm = cron_util.getProcessingWindow({'endWindow':end,'startWindow':start},self.tableName,None,cursor,me.fileLogger)
    assert (start,delta,end) == mm, 'But got %s'%(str(mm))

    # and that one of each works  (not full test because using transparent box testing)
    mm = cron_util.getProcessingWindow({'deltaWindow':delta},self.tableName,None,cursor,me.fileLogger,startWindow=start)

    # check that three good kwargs works
    mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,endWindow=end,deltaWindow=delta,startWindow=start)
    assert (start,delta,end) == mm, 'But got %s'%(str(mm))

    # check that three incompatible kwargs fails
    badDelta = delta + delta
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,endWindow=end,deltaWindow=badDelta,startWindow=start)

    # check that good processingDay works as expected
    mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,processingDay=procDay)
    assert (procStart,procDelta,procEnd) == mm, 'But got %s'%(str(mm))

    #check that invalid date (because it is a datetime) fails
    extraProcDay = datetime.datetime(2001,9,8,7,6,5)
    assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,processingDay=extraProcDay)

    # check that kwargs beats config (not full test because using transparent box testing)
    otherProcDay = datetime.datetime(2003,9,8,7,6,5)
    mm = cron_util.getProcessingWindow({'processingDay':otherProcDay},self.tableName,None,cursor,me.fileLogger,processingDay=procDay)
    assert (procStart,procDelta,procEnd) == mm, 'But got %s'%(str(mm))

Example #6

0

Show file

File: mtbf.py Project: AlinT/socorro

  def processOneMtbfWindow(self, **kwargs):
    """
    Extract data from reports into time_before_failure
    kwargs options beat configContext, and within those two:
    - intervalSizeMinutes is the number of minutes for this calculation interval. Default: One day's worth
    - intervalEnd is the moment past the end of the calculation interval.
    - processingDay (old style): the interval starts at midnight of the day, ends prior to next midnight

    You may limit the calculation with one or more of the following:
    - product: name the product to be looked at
    - version: name the version of the product to be looked at
    - os_name: name the OS to be looked at
    - os_version: name the version of the OS to be looked at
    """
    cur = self.connection.cursor()
    startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,cur,self.logger,**kwargs)
    if self.debugging:
      self.logger.debug("startWindow: %s, deltaWindow: %s, endWindow: %s",startWindow,deltaWindow,endWindow)
    sqlDict = {
      'configTable': configTable, 'startDate':'start_date', 'endDate':'end_date',
      'windowStart':startWindow, 'windowEnd':endWindow, 'windowSize':deltaWindow,
      }
    extraWhereClause = ''
    if kwargs:
      specs = []
      if 'product' in kwargs:
        specs.append('p.product = %(product)s')
        sqlDict['product'] = kwargs['product']
      elif 'product' in self.configContext and self.configContext.product: # ignore empty
        specs.append('p.product = %(product)s')
        sqlDict['product'] = self.configContext.product
      if 'version' in kwargs:
        specs.append('p.version = %(version)s')
        sqlDict['version'] = kwargs['version']
      elif 'version' in self.configContext and self.configContext.version: # ignore empty
        specs.append('p.version = %(version)s')
        sqlDict['version'] = self.configContext.version
      if 'os_name' in kwargs:
        specs.append('r.os_name = %(os_name)s')
        sqlDict['os_name'] = kwargs['os_name']
      elif 'os_name' in self.configContext and self.configContext.os_name: # ignore empty
        specs.append('r.os_name = %(os_name)s')
        sqlDict['os_name'] = self.configContext.os_name
      if 'os_version' in kwargs:
        specs.append('r.os_version LIKE %(os_version)s')
        sqlDict['os_version'] = "%%%s%%"%(kwargs['os_version'])
      elif 'os_version' in self.configContext and self.configContext.os_version: # ignore empty
        specs.append('r.os_version LIKE %(os_version)s')
        sqlDict['os_version'] = "%%%s%%"%(self.configContext.os_version)
      if specs:
        extraWhereClause = ' AND '+' AND '.join(specs)
    sqlDict['extraWhereClause'] = extraWhereClause
    # per ss: mtbf runs for 60 days from the time it starts: Ignore cfg.end
    sql = """SELECT SUM(cast(r.uptime as float)) AS sum_uptime_seconds,
                  COUNT(r.*) AS report_count,
                  p.id,
                  -- o.id, -- would go here
                  timestamp %%(windowEnd)s,
                  interval %%(windowSize)s,
                  r.os_name,
                  r.os_version
           FROM reports r JOIN productdims p ON r.product = p.product AND r.version = p.version
                          JOIN %(configTable)s cfg ON p.id = productdims_id
           WHERE NOT cfg.ignore
                 AND cfg.%(startDate)s <= %%(windowStart)s
                 AND %%(windowStart)s <= cfg.%(startDate)s+interval '60 days' -- per ss
                 AND %%(windowStart)s <= r.date_processed AND r.date_processed < %%(windowEnd)s
                 %(extraWhereClause)s
              GROUP BY p.id, r.os_name,r.os_version
          """%(sqlDict)
    inSql = """INSERT INTO %s
                  (sum_uptime_seconds, report_count, productdims_id, osdims_id, window_end, window_size)
            VALUES(%%s,%%s,%%s,%%s,%%s,%%s)"""%resultTable
    try:
      idCache = socorro_cia.IdCache(cur)
      if self.debugging:
        self.logger.debug("Using select sql:\n%s",cur.mogrify(sql,sqlDict))
      cur.execute(sql,sqlDict)
      self.connection.rollback()
      data = cur.fetchall()
      idData = [ [d[0],d[1],d[2],idCache.getOsId(d[5],d[6]),d[3],d[4]] for d in data if idCache.getOsId(d[5],d[6]) ]
      if self.debugging:
        self.logger.debug("Will insert %s new rows",len(data))
      cur.executemany(inSql,idData)
      self.connection.commit()

    except psycopg2.IntegrityError,x:
      # if the inner select has no matches then AVG(uptime) is null, thus violating the avg_seconds not-null constraint.
      # This is good, we depend on this to keep
      # facts with 0 out of db
      # For properly configured products, this shouldn't happen very often
      self.connection.rollback()
      self.logger.warn("No facts aggregated for day %s"%sWindow)