def countCrashesByUrlInWindow(self,**kwargs): """ Collect the count of all crashes per url within this time window. Deliberately ignore platform and os details to get counts per url on a global basis return [(count, url),...] for as many as maximumUrls hits within the time window, each with at least minimumHitsPerUrl. """ cur = self.connection.cursor() windowStart,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,self.productVersionRestriction,cur,logger,**kwargs) if not windowStart: # we don't care why return [] self.configContext['deltaWindow'] = deltaWindow selector = {'startDate':windowStart,'endDate':(windowStart + deltaWindow)} topUrlSql = """SELECT COUNT(r.id), r.url FROM %(reportsTable)s r JOIN productdims p ON r.product = p.product AND r.version = p.version JOIN product_visibility cfg ON p.id = cfg.productdims_id WHERE r.url IS NOT NULL AND r.url <> '' AND %%(startDate)s <= r.%(dateColumn)s AND r.%(dateColumn)s < %%(endDate)s AND cfg.start_date <= r.%(dateColumn)s AND r.%(dateColumn)s <= cfg.end_date %(productVersionSqlRestrictionPhrase)s GROUP BY r.url ORDER BY COUNT(r.id) desc LIMIT %(fatMaximumUrls)s"""%(self.configContext) # fatMaximumUrls is a HACK to assure enough cooked urls cur.execute(topUrlSql,selector) data = cur.fetchall() # count (implicit rank) and url here. self.connection.rollback() # per suggestion in psycopg mailing list: Rollback if no db modification if not data: logger.warn("No url crash data collected between %(startDate)s and %(endDate)s",selector) return data
def processDateInterval(self,**kwargs): """ Loop over all the processingIntervals within the specified startDate, endDate period: gathering, orgainizing and storing he summary data for each interval. Parameters in kwargs can be used to override the same parameters passed to self's constructor: startDate, endDate, dateColumnName, processingInterval In addition, you may pass a map as summaryCrashes which will be extended in the first processing interval """ summaryCrashes = kwargs.get('summaryCrashes',{}) oldDateColumnName = self.dateColumnName self.dateColumnName = kwargs.get('dateColumnName',self.dateColumnName) revertDateColumnName = (self.dateColumnName != oldDateColumnName) cursor = self.connection.cursor() startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,self.productVersionRestriction,cursor,logger,**kwargs) startWindow = self.startDate try: fullCount = 0 while startWindow + deltaWindow <= self.endDate: logger.debug("%s - Processing with interval from %s, size=%s)",threading.currentThread().getName(),startWindow,deltaWindow) summaryCrashes = self.extractDataForPeriod(startWindow, startWindow+deltaWindow, summaryCrashes) data = self.fixupCrashData(summaryCrashes,startWindow+deltaWindow,deltaWindow) fullCount += self.storeFacts(data, "Start: %s, size=%s"%(startWindow,deltaWindow)) summaryCrashes = {} startWindow += deltaWindow finally: self.connection.close() if revertDateColumnName: self.dateColumnName = oldDateColumnName return fullCount
def processDateInterval(self, **kwargs): cursor = self.connection.cursor() kwargs.setdefault('defaultDeltaWindow',defaultDeltaWindow) startDate,deltaDate,endDate = cron_util.getProcessingDates(self.configContext, resultTable, self.productVersionRestriction, cursor, logger, **kwargs) startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext, resultTable,self.productVersionRestriction,cursor, logger, **kwargs) logger.info("Starting loop from %s up to %s step (%s)",startDate.isoformat(),endDate.isoformat(),deltaWindow) if not startWindow: startWindow = startDate if not deltaWindow: deltaWindow = defaultDeltaWindow while startWindow + deltaWindow < endDate: data = self.countCrashesByUrlInWindow(startWindow=startWindow,deltaWindow=deltaWindow) if data: logger.info("Saving %s items in window starting at %s",len(data),startWindow) self.saveData(startWindow,data) else: logger.info("Window starting at %s had no data",startWindow) # whether or not we saved some data, advance to next slot startWindow += deltaWindow logger.info("Done processIntervals")
def processDateInterval(self, **kwargs): """ call processOneMtbfWindow repeatedly for each window in the range defined by at least two paramerters among (start|delta|end)Date Other kwargs/context values are passed unchanged to processOneMtbfWindow """ cur = self.connection.cursor() now = datetime.datetime.now() startDate, deltaDate, endDate = cron_util.getProcessingDates(self.configContext,resultTable,cur,self.logger,**kwargs) startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,cur,self.logger,**kwargs) if not startDate and not startWindow: self.logger.warn("MTBF (%s): No startDate, no startWindow. Did not run.",now) return 0 if not startWindow: # we are guaranteed a startDate after the test above startWindow = startDate if not startDate or startDate > startWindow: startDate = startWindow if not deltaWindow: deltaWindow = datetime.timedelta(days=1) thisMidnight = now.replace(hour=0,minute=0,second=0,microsecond=0) if not endDate or endDate > thisMidnight: endDate = thisMidnight if startDate + deltaWindow > endDate: self.logger.warn("MTBF (%s) startDate (%s) too close to endDate (%s). Did not run.",now, startDate,endDate) return 0 count = 0 if self.debugging: self.logger.debug("""mtbf.processDateInterval: startDate: %s, deltaDate: %s, endDate: %s startWindow: %s, deltaWindow: %s, endWindow: %s """,startDate,deltaDate,endDate,startWindow,deltaWindow,endWindow) kwargs['deltaWindow'] = deltaWindow startWindow = startDate while startWindow + deltaWindow < endDate: kwargs['startWindow'] = startWindow self.processOneMtbfWindow(**kwargs) startWindow += deltaWindow count += 1 return count
def testGetProcessingWindow(self): cursor = self.connection.cursor() config = {} # check that a really empty system fails assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger) self.createBunny() # check that nothing useful yields nothing mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger) assert (None,None,None) == mm, 'Expected None*3, got %s'%(str(mm)) start = datetime.datetime(2000,1,2,12,12) delta = datetime.timedelta(seconds=300) end = start+delta procDay = datetime.date(2001,9,8) procStart = datetime.datetime(2001,9,8) procDelta = datetime.timedelta(days=1) procEnd = procStart+procDelta # check that just one kwarg raises SystemExit assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,startWindow=start) assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,endWindow=start) assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,deltaWindow=delta) # check that just one config raises SystemExit assert_raises(SystemExit,cron_util.getProcessingWindow,{'startWindow':start},self.tableName,None,cursor,me.fileLogger) assert_raises(SystemExit,cron_util.getProcessingWindow,{'endWindow':end},self.tableName,None,cursor,me.fileLogger) assert_raises(SystemExit,cron_util.getProcessingWindow,{'deltaWindow':delta},self.tableName,None,cursor,me.fileLogger) # check that processingDay doesn't help assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,deltaWindow=delta,processingDay=procDay) assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,startWindow=start,processingDay=procDay) assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,endWindow=start,processingDay=procDay) # ... with config either assert_raises(SystemExit,cron_util.getProcessingWindow,{'startWindow':start},self.tableName,None,cursor,me.fileLogger,processingDay=procDay) assert_raises(SystemExit,cron_util.getProcessingWindow,{'endWindow':end},self.tableName,None,cursor,me.fileLogger,processingDay=procDay) assert_raises(SystemExit,cron_util.getProcessingWindow,{'deltaWindow':delta,'processingDay':procDay},self.tableName,None,cursor,me.fileLogger) # check that any two kwargs work correctly mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,endWindow=end,startWindow=start) assert (start,delta,end) == mm, 'But got %s'%(str(mm)) mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,deltaWindow=delta,startWindow=start) assert (start,delta,end) == mm, 'But got %s'%(str(mm)) mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,endWindow=end,deltaWindow=delta) assert (start,delta,end) == mm, 'But got %s'%(str(mm)) # and that two configs work mm = cron_util.getProcessingWindow({'endWindow':end,'startWindow':start},self.tableName,None,cursor,me.fileLogger) assert (start,delta,end) == mm, 'But got %s'%(str(mm)) # and that one of each works (not full test because using transparent box testing) mm = cron_util.getProcessingWindow({'deltaWindow':delta},self.tableName,None,cursor,me.fileLogger,startWindow=start) # check that three good kwargs works mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,endWindow=end,deltaWindow=delta,startWindow=start) assert (start,delta,end) == mm, 'But got %s'%(str(mm)) # check that three incompatible kwargs fails badDelta = delta + delta assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,endWindow=end,deltaWindow=badDelta,startWindow=start) # check that good processingDay works as expected mm = cron_util.getProcessingWindow(config,self.tableName,None,cursor,me.fileLogger,processingDay=procDay) assert (procStart,procDelta,procEnd) == mm, 'But got %s'%(str(mm)) #check that invalid date (because it is a datetime) fails extraProcDay = datetime.datetime(2001,9,8,7,6,5) assert_raises(SystemExit,cron_util.getProcessingWindow,config,self.tableName,None,cursor,me.fileLogger,processingDay=extraProcDay) # check that kwargs beats config (not full test because using transparent box testing) otherProcDay = datetime.datetime(2003,9,8,7,6,5) mm = cron_util.getProcessingWindow({'processingDay':otherProcDay},self.tableName,None,cursor,me.fileLogger,processingDay=procDay) assert (procStart,procDelta,procEnd) == mm, 'But got %s'%(str(mm))
def processOneMtbfWindow(self, **kwargs): """ Extract data from reports into time_before_failure kwargs options beat configContext, and within those two: - intervalSizeMinutes is the number of minutes for this calculation interval. Default: One day's worth - intervalEnd is the moment past the end of the calculation interval. - processingDay (old style): the interval starts at midnight of the day, ends prior to next midnight You may limit the calculation with one or more of the following: - product: name the product to be looked at - version: name the version of the product to be looked at - os_name: name the OS to be looked at - os_version: name the version of the OS to be looked at """ cur = self.connection.cursor() startWindow,deltaWindow,endWindow = cron_util.getProcessingWindow(self.configContext,resultTable,cur,self.logger,**kwargs) if self.debugging: self.logger.debug("startWindow: %s, deltaWindow: %s, endWindow: %s",startWindow,deltaWindow,endWindow) sqlDict = { 'configTable': configTable, 'startDate':'start_date', 'endDate':'end_date', 'windowStart':startWindow, 'windowEnd':endWindow, 'windowSize':deltaWindow, } extraWhereClause = '' if kwargs: specs = [] if 'product' in kwargs: specs.append('p.product = %(product)s') sqlDict['product'] = kwargs['product'] elif 'product' in self.configContext and self.configContext.product: # ignore empty specs.append('p.product = %(product)s') sqlDict['product'] = self.configContext.product if 'version' in kwargs: specs.append('p.version = %(version)s') sqlDict['version'] = kwargs['version'] elif 'version' in self.configContext and self.configContext.version: # ignore empty specs.append('p.version = %(version)s') sqlDict['version'] = self.configContext.version if 'os_name' in kwargs: specs.append('r.os_name = %(os_name)s') sqlDict['os_name'] = kwargs['os_name'] elif 'os_name' in self.configContext and self.configContext.os_name: # ignore empty specs.append('r.os_name = %(os_name)s') sqlDict['os_name'] = self.configContext.os_name if 'os_version' in kwargs: specs.append('r.os_version LIKE %(os_version)s') sqlDict['os_version'] = "%%%s%%"%(kwargs['os_version']) elif 'os_version' in self.configContext and self.configContext.os_version: # ignore empty specs.append('r.os_version LIKE %(os_version)s') sqlDict['os_version'] = "%%%s%%"%(self.configContext.os_version) if specs: extraWhereClause = ' AND '+' AND '.join(specs) sqlDict['extraWhereClause'] = extraWhereClause # per ss: mtbf runs for 60 days from the time it starts: Ignore cfg.end sql = """SELECT SUM(cast(r.uptime as float)) AS sum_uptime_seconds, COUNT(r.*) AS report_count, p.id, -- o.id, -- would go here timestamp %%(windowEnd)s, interval %%(windowSize)s, r.os_name, r.os_version FROM reports r JOIN productdims p ON r.product = p.product AND r.version = p.version JOIN %(configTable)s cfg ON p.id = productdims_id WHERE NOT cfg.ignore AND cfg.%(startDate)s <= %%(windowStart)s AND %%(windowStart)s <= cfg.%(startDate)s+interval '60 days' -- per ss AND %%(windowStart)s <= r.date_processed AND r.date_processed < %%(windowEnd)s %(extraWhereClause)s GROUP BY p.id, r.os_name,r.os_version """%(sqlDict) inSql = """INSERT INTO %s (sum_uptime_seconds, report_count, productdims_id, osdims_id, window_end, window_size) VALUES(%%s,%%s,%%s,%%s,%%s,%%s)"""%resultTable try: idCache = socorro_cia.IdCache(cur) if self.debugging: self.logger.debug("Using select sql:\n%s",cur.mogrify(sql,sqlDict)) cur.execute(sql,sqlDict) self.connection.rollback() data = cur.fetchall() idData = [ [d[0],d[1],d[2],idCache.getOsId(d[5],d[6]),d[3],d[4]] for d in data if idCache.getOsId(d[5],d[6]) ] if self.debugging: self.logger.debug("Will insert %s new rows",len(data)) cur.executemany(inSql,idData) self.connection.commit() except psycopg2.IntegrityError,x: # if the inner select has no matches then AVG(uptime) is null, thus violating the avg_seconds not-null constraint. # This is good, we depend on this to keep # facts with 0 out of db # For properly configured products, this shouldn't happen very often self.connection.rollback() self.logger.warn("No facts aggregated for day %s"%sWindow)