def testProcessDateInterval(self): """ testTopCrashesByUrl:TestTopCrashesByUrl.testProcessDateInterval(slow=7) Takes a long time, first to set up the data (about 1.5 seconds), then to process it several times """ global me cursor = self.connection.cursor() config = copy.copy(me.config) ## Set up dbtutil.fillReportsTable(cursor,createUrls=True,multiplier=2,signatureCount=83) # just some data... self.connection.commit() t = tcbu.TopCrashesByUrl(config) ## assure we have an empty playing field cursor.execute("SELECT COUNT(*) from top_crashes_by_url") self.connection.rollback() assert 0 == cursor.fetchone()[0] t.processDateInterval(startDate = datetime.datetime(2008,1,1), endDate=datetime.datetime(2008,1,6)) cursor.execute("SELECT COUNT(id) from top_crashes_by_url") self.connection.rollback() count = cursor.fetchone()[0] assert 35 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) cursor.execute("SELECT COUNT(*) from top_crashes_by_url_signature") self.connection.rollback() count = cursor.fetchone()[0] assert 38 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) cursor.execute("delete from top_crashes_by_url; delete from top_crashes_by_url_signature") self.connection.commit() t = tcbu.TopCrashesByUrl(copy.copy(me.config)) t.processDateInterval(startDate = datetime.datetime(2008,1,4), endDate=datetime.datetime(2008,1,8)) cursor.execute("SELECT COUNT(id) from top_crashes_by_url") self.connection.rollback() count = cursor.fetchone()[0] assert 31 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) cursor.execute("SELECT COUNT(*) from top_crashes_by_url_signature") self.connection.rollback() count = cursor.fetchone()[0] assert 32 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) cursor.execute("delete from top_crashes_by_url; delete from top_crashes_by_url_signature") self.connection.commit() t = tcbu.TopCrashesByUrl(copy.copy(me.config)) t.processDateInterval(startDate = datetime.datetime(2008,1,1), endDate=datetime.datetime(2008,3,3)) cursor.execute("SELECT COUNT(id) from top_crashes_by_url") self.connection.rollback() count = cursor.fetchone()[0] assert 483 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) cursor.execute("SELECT COUNT(*) from top_crashes_by_url_signature") self.connection.rollback() count = cursor.fetchone()[0] assert 514 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count)
def createData(config,logger): # Now do the work in several steps connection = sdatabase.Database(config).connection() #connection = psycopg2.connect("host=%(databaseHost)s dbname=%(databaseName)s user=%(databaseUserName)s password=%(databasePassword)s"%config) cursor = connection.cursor() testDB = TestDB() try: testDB.removeDB(config,logger) if config.get('drop-all'): print "Dropped the database tables ..." return print "Creating the database tables..." testDB.createDB(config,logger) print "populating the dimensions tables..." processingDays,ignore = dbtestutil.fillMtbfTables(cursor,limit=int(config.get('product-os-count'))) startDays = [x[0] for x in processingDays] multiplier = int(config.get('repeat-count')) print "populating the reports table (takes about %d seconds)..."%(int(1.7+1.2*multiplier)) dbtestutil.fillReportsTable(cursor,createUrls=True,doFillMtbfTables=False, numUrls=100, multiplier=multiplier,signatureCount=int(config.get('signature-count'))) connection.commit() extras = [] print "All done populating the 'raw' data" if config.get('mtbf-fill'): blurb = "" cost = 0.20 + multiplier*0.15 if cost > 1.0: blurb = ("(takes about %2.1f seconds)"%cost) print "Filling the time_before_failure table %s..."%blurb # R=1: .35 seconds; 2:0.49s; 3:.064s; 4:.9 ## = .20 + R*.15 starter = None ender = None mtbfInstance = mtbf.Mtbf(config,logger) for startDay in startDays: if not starter: starter = startDay ender = startDay mtbfInstance.processOneMtbfWindow(processingDay=startDay) extras.append(" - Time before fail: for days in %s through %s"%(starter,ender)) if config.get('sig-fill'): print "Filling the top_crashes_by_signature table (takes about %s seconds)..."%(20+11*multiplier) # R=1: 27.3 secs; 2:38.5s; 3=48.3 ## = 20 +R*11 tc = topcrasher.TopCrashesBySignature(config) tc.processDateInterval(startDate=startDays[0],endDate = startDays[-1]) extras.append(" - Top crash by sig: for days in %s through %s"%(startDays[0],startDays[-1])) if config.get('url-fill'): print "Filling the top_crashes_by_url table (takes about %s seconds)..."%(4+multiplier*2) # R=1: 4 secs; 2: 5s, 3: 7.6 ## = 4+R*2 logger.info("Filling the top_crashes_by_url table (takes about %s seconds)..."%(4+multiplier*2)) tu = topcrashbyurl.TopCrashesByUrl(config) tu.processDateInterval(startDate=startDays[0],endDate = startDays[-1]) extras.append(" - Top crash by url: for days in %s through %s"%(startDays[0],startDays[-1])) print "DONE populating the database tables" if extras: print "\n".join(extras) finally: logger.info("All done. Closing connection") connection.close()
def testCountCrashesByUrlInWindow(self): """testTopCrashesByUrl:TestTopCrashesByUrl.testCountCrashesByUrlInWindow(self):""" global me cursor = self.connection.cursor() dbtutil.fillReportsTable(cursor,createUrls=True,multiplier=2,signatureCount=83) # just some data... self.connection.commit() config = copy.copy(me.config) # test /w/ 'normal' params t = tcbu.TopCrashesByUrl(config) startWindow = datetime.datetime(2008,1,1) deltaWindow = datetime.timedelta(days=1) endWindow = startWindow + deltaWindow data = t.countCrashesByUrlInWindow(startWindow = startWindow, deltaWindow = deltaWindow) # the following are JUST regression tests: The data has been only very lightly examined to be sure it makes sense. assert 24 == len(data), 'This is (just) a regression test. Did you change the data somehow? (%s)'%len(data) for d in data: assert 1 == d[0] # test /w/ small maximumUrls config = copy.copy(me.config) t = tcbu.TopCrashesByUrl(config, maximumUrls=50) data = t.countCrashesByUrlInWindow(startWindow = datetime.datetime(2008,1,1), endWindow = endWindow) assert 24 == len(data), 'This is (just) a regression test. Did you change the data somehow? (%s)'%len(data) for d in data: assert 1 == d[0] # test /w/ minimumHitsPerUrl larger config = copy.copy(me.config) t = tcbu.TopCrashesByUrl(config, minimumHitsPerUrl=2) data = t.countCrashesByUrlInWindow(startWindow = datetime.datetime(2008,1,1),endWindow = endWindow) assert 24 == len(data), len(data) for d in data: assert 1 == d[0] # test /w/ shorter window config = copy.copy(me.config) halfDay = datetime.timedelta(hours=12) t = tcbu.TopCrashesByUrl(config, deltaWindow = halfDay) data = t.countCrashesByUrlInWindow(startWindow = datetime.datetime(2008,1,1)) assert 12 == len(data), 'This is (just) a regression test. Did you change the data somehow? (%s)'%len(data) for d in data: assert 1 == d[0] # test a different day, to be sure we get different data config = copy.copy(me.config) t = tcbu.TopCrashesByUrl(config) data = t.countCrashesByUrlInWindow(startWindow = datetime.datetime(2008,1,11),deltaWindow=deltaWindow) assert 57 == len(data), 'This is (just) a regression test. Did you change the data somehow? (%s)'%len(data) for d in data[:3]: assert 2 == d[0] for d in data[3:]: assert 1 == d[0]
def testSaveTruncatedData(self): """ testTopCrashesByUrl:TestTopCrashesByUrl.testSaveTruncatedData(slow=2) This is a reasonably realistic amount of time (about 1.5 seconds) to handle about 150 reports """ global me cursor = self.connection.cursor() ## Set up dbtutil.fillReportsTable(cursor,createUrls=True,multiplier=2,signatureCount=83) # just some data... self.connection.commit() # ... now assure some duplicates sqls = "SELECT uuid, client_crash_date, install_age, last_crash, uptime, date_processed, success, signature, url, product, version, os_name, os_version from reports where date_processed >= '2008-01-01' and date_processed < '2008-01-02' LIMIT 4" cursor.execute(sqls) self.connection.rollback() # db not altered rows3 = cursor.fetchall() add11 = datetime.timedelta(seconds=1,microseconds=1000) addData = [] for i in range(3): r = list(rows3[i]) r[0] = r[0].replace('-dead-','-f00f-') r[1] += add11 r[2] += 1 r[3] += 1 r[7] = rows3[i+1][7] addData.append(r) r[0] = r[0].replace('-f00f-','-fead-') r[1] += add11 r[2] += 1 r[3] += 1 r[7] = 'js_blatherskytes' addData.append(r) sqli = """INSERT INTO reports (uuid, client_crash_date, install_age, last_crash, uptime, date_processed, success, signature, url, product, version, os_name, os_version) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" addData.extend([ ['b965de73-ae90-b936-deaf-03ae20081225','2007-12-31 23:59:50',9000,110,222,'2008-01-01 11:12:13',True,'UserCallWinProcCheckWow','http://www.mozilla.org/projects/minefield/a','Firefox','3.0.9','Windows NT','5.1.2600 Service Pack 2'], ['b965de73-ae90-b935-deaf-03ae20081225','2007-12-31 23:59:40',9009,220,333,'2008-01-01 11:12:14',True,'UserCallWinProcCheckWow','http://yachats/uncwiki/LarsLocalPortal/b', 'Firefox','3.0.9','Windows NT','5.1.2600 Service Pack 2'], ]) cursor.executemany(sqli,addData) self.connection.commit() config = copy.copy(me.config) startWindow = datetime.datetime(2008,1,1) deltaWindow = datetime.timedelta(days=1) ## On your mark... t = tcbu.TopCrashesByUrl(config,truncateUrlLength=25) data = t.countCrashesByUrlInWindow(startWindow = startWindow, deltaWindow = deltaWindow) ## assure we have an empty playing field cursor.execute("SELECT COUNT(*) from top_crashes_by_url") self.connection.rollback() assert 0 == cursor.fetchone()[0] ## Call the method t.saveData(startWindow,data) # expect 99 rows cursor.execute("SELECT COUNT(id) from top_crashes_by_url") self.connection.rollback() count = cursor.fetchone()[0] assert 30 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) # expect 80 distinct urls cursor.execute("SELECT COUNT(distinct urldims_id) from top_crashes_by_url") self.connection.rollback() count = cursor.fetchone()[0] assert 17 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) cursor.execute("SELECT count from top_crashes_by_url where count > 1 order by count") self.connection.rollback() data = cursor.fetchall() assert [(2,), (2,), (2,), (2,), (2,), (4,)] == data, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(str(data)) cursor.execute("SELECT COUNT(top_crashes_by_url_id) from top_crashes_by_url_signature") self.connection.rollback() count = cursor.fetchone()[0] assert 38 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) cursor.execute("SELECT COUNT(distinct top_crashes_by_url_id) from top_crashes_by_url_signature") self.connection.rollback() count = cursor.fetchone()[0] assert 30 == count, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(count) # Expect 3 rows with sums of 2 and three rows with counts of 2, none with both cursor.execute("SELECT count, COUNT(top_crashes_by_url_id) AS sum FROM top_crashes_by_url_signature GROUP BY top_crashes_by_url_id, count ORDER BY sum DESC, count DESC LIMIT 6") self.connection.rollback() data = cursor.fetchall() assert 6 == len(data) assert [(1, 4L), (1, 2L), (1, 2L), (1, 2L), (1, 2L), (1, 2L)] == data, 'This is (just) a regression test. Did you change the data somehow? (%s)'%(str(data))