def FetchTimeseriesData(args): def _MatchesAllFilters(test_path): return all(f in test_path for f in args.filters) api = dashboard_api.PerfDashboardCommunicator(args) with tables.DbSession(args.database_file) as con: # Get test_paths. if args.benchmark is not None: api = dashboard_api.PerfDashboardCommunicator(args) test_paths = api.dashboard.ListTestPaths(args.benchmark, sheriff=args.sheriff) elif args.input_file is not None: test_paths = list(_ReadTestPathsFromFile(args.input_file)) elif args.study is not None: test_paths = list(args.study.IterTestPaths(api)) else: raise ValueError('No source for test paths specified') # Apply --filter's to test_paths. if args.filters: test_paths = filter(_MatchesAllFilters, test_paths) num_found = len(test_paths) print '%d test paths found!' % num_found # Filter out test_paths already in cache. if args.use_cache: test_paths = list(_IterStaleTestPaths(con, test_paths)) num_skipped = num_found - len(test_paths) if num_skipped: print '(skipping %d test paths already in the database)' % num_skipped # Use worker pool to fetch test path data. total_seconds = worker_pool.Run( 'Fetching data of %d timeseries: ' % len(test_paths), _FetchTimeseriesWorker, args, test_paths) print '[%.1f test paths per second]' % (len(test_paths) / total_seconds) if args.output_csv is not None: print print 'Post-processing data for study ...' dfs = [] with tables.DbSession(args.database_file) as con: for test_path in test_paths: df = tables.timeseries.GetTimeSeries(con, test_path) dfs.append(df) df = studies.PostProcess(pandas.concat(dfs, ignore_index=True)) with utils.OpenWrite(args.output_csv) as f: df.to_csv(f, index=False) print 'Wrote timeseries data to:', args.output_csv
def FetchAlertsData(args): api = dashboard_api.PerfDashboardCommunicator(args) with tables.DbSession(args.database_file) as con: # Get alerts. num_alerts = 0 bug_ids = set() # TODO: This loop may be slow when fetching thousands of alerts, needs a # better progress indicator. for data in api.IterAlertData(args.benchmark, args.sheriff, args.days): alerts = tables.alerts.DataFrameFromJson(data) pandas_sqlite.InsertOrReplaceRecords(con, 'alerts', alerts) num_alerts += len(alerts) bug_ids.update(alerts['bug_id'].unique()) print '%d alerts found!' % num_alerts # Get set of bugs associated with those alerts. bug_ids.discard(0) # A bug_id of 0 means untriaged. print '%d bugs found!' % len(bug_ids) # Filter out bugs already in cache. if args.use_cache: known_bugs = set(b for b in bug_ids if tables.bugs.Get(con, b) is not None) if known_bugs: print '(skipping %d bugs already in the database)' % len( known_bugs) bug_ids.difference_update(known_bugs) # Use worker pool to fetch bug data. total_seconds = worker_pool.Run( 'Fetching data of %d bugs: ' % len(bug_ids), _FetchBugsWorker, args, bug_ids) print '[%.1f bugs per second]' % (len(bug_ids) / total_seconds)
def FetchTimeseriesData(args): def _MatchesAllFilters(test_path): return all(f in test_path for f in args.filters) api = dashboard_api.PerfDashboardCommunicator(args) con = sqlite3.connect(args.database_file) try: tables.CreateIfNeeded(con) test_paths = api.ListTestPaths(args.benchmark, sheriff=args.sheriff) if args.filters: test_paths = filter(_MatchesAllFilters, test_paths) num_found = len(test_paths) print '%d test paths found!' % num_found if args.use_cache: test_paths = list(_IterStaleTestPaths(con, test_paths)) num_skipped = num_found - len(test_paths) if num_skipped: print '(skipping %d test paths already in the database)' % num_skipped for test_path in test_paths: data = api.GetTimeseries(test_path, days=args.days) timeseries = tables.timeseries.DataFrameFromJson(data) pandas_sqlite.InsertOrReplaceRecords(con, 'timeseries', timeseries) finally: con.close()
def FetchTimeseriesData(args): def _MatchesAllFilters(test_path): return all(f in test_path for f in args.filters) if args.benchmark is not None: api = dashboard_api.PerfDashboardCommunicator(args) test_paths = api.ListTestPaths(args.benchmark, sheriff=args.sheriff) elif args.input_file is not None: test_paths = list(_ReadTestPathsFromFile(args.input_file)) else: raise NotImplementedError('Expected --benchmark or --input-file') if args.filters: test_paths = filter(_MatchesAllFilters, test_paths) num_found = len(test_paths) print '%d test paths found!' % num_found con = sqlite3.connect(args.database_file) try: tables.CreateIfNeeded(con) if args.use_cache: test_paths = list(_IterStaleTestPaths(con, test_paths)) num_skipped = num_found - len(test_paths) if num_skipped: print '(skipping %d test paths already in the database)' % num_skipped finally: con.close() total_seconds = worker_pool.Run( 'Fetching data of %d timeseries: ' % len(test_paths), _FetchTimeseriesWorker, args, test_paths) print '[%.1f test paths per second]' % (len(test_paths) / total_seconds)
def FetchAlertsData(args): api = dashboard_api.PerfDashboardCommunicator(args) con = sqlite3.connect(args.database_file) try: tables.CreateIfNeeded(con) alerts = tables.alerts.DataFrameFromJson( api.GetAlertData(args.benchmark, args.sheriff, args.days)) print '%d alerts found!' % len(alerts) pandas_sqlite.InsertOrReplaceRecords(con, 'alerts', alerts) bug_ids = set(alerts['bug_id'].unique()) bug_ids.discard(0) # A bug_id of 0 means untriaged. print '%d bugs found!' % len(bug_ids) if args.use_cache: known_bugs = set(b for b in bug_ids if tables.bugs.Get(con, b) is not None) if known_bugs: print '(skipping %d bugs already in the database)' % len( known_bugs) bug_ids.difference_update(known_bugs) finally: con.close() total_seconds = worker_pool.Run( 'Fetching data of %d bugs: ' % len(bug_ids), _FetchBugsWorker, args, bug_ids) print '[%.1f bugs per second]' % (len(bug_ids) / total_seconds)
def FetchTimeseriesData(args): def _MatchesAllFilters(test_path): return all(f in test_path for f in args.filters) with _ApiAndDbSession(args) as (api, con): # Get test_paths. if args.benchmark is not None: api = dashboard_api.PerfDashboardCommunicator(args) test_paths = api.ListTestPaths(args.benchmark, sheriff=args.sheriff) elif args.input_file is not None: test_paths = list(_ReadTestPathsFromFile(args.input_file)) else: raise NotImplementedError('Expected --benchmark or --input-file') # Apply --filter's to test_paths. if args.filters: test_paths = filter(_MatchesAllFilters, test_paths) num_found = len(test_paths) print '%d test paths found!' % num_found # Filter out test_paths already in cache. if args.use_cache: test_paths = list(_IterStaleTestPaths(con, test_paths)) num_skipped = num_found - len(test_paths) if num_skipped: print '(skipping %d test paths already in the database)' % num_skipped # Use worker pool to fetch test path data. total_seconds = worker_pool.Run( 'Fetching data of %d timeseries: ' % len(test_paths), _FetchTimeseriesWorker, args, test_paths) print '[%.1f test paths per second]' % (len(test_paths) / total_seconds)
def FetchTimeseriesData(args): dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args) with open(args.output_path, 'wb') as fp: csv_writer = csv.writer(fp) for row in dashboard_communicator.GetAllTimeseriesForBenchmark( args.benchmark, args.days, args.filters): csv_writer.writerow(row)
def _FetchBugsWorker(args): api = dashboard_api.PerfDashboardCommunicator(args) con = sqlite3.connect(args.database_file, timeout=10) def Process(bug_id): bugs = tables.bugs.DataFrameFromJson(api.GetBugData(bug_id)) pandas_sqlite.InsertOrReplaceRecords(con, 'bugs', bugs) worker_pool.Process = Process
def _FetchTimeseriesWorker(args): api = dashboard_api.PerfDashboardCommunicator(args) con = sqlite3.connect(args.database_file, timeout=10) def Process(test_path): data = api.GetTimeseries(test_path, days=args.days) timeseries = tables.timeseries.DataFrameFromJson(data) pandas_sqlite.InsertOrReplaceRecords(con, 'timeseries', timeseries) worker_pool.Process = Process
def FetchAlertsData(args): dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args) conn = sqlite3.connect(args.database_file) try: alerts = tables.alerts.DataFrameFromJson( dashboard_communicator.GetAlertData(args.benchmark, args.days)) print '%s alerts found!' % len(alerts) # TODO: Make this update rather than replace the existing table. # Note that if_exists='append' does not work since there is no way to # specify in pandas' |to_sql| a primary key or, more generally, uniqueness # constraints on columns. So this would lead to duplicate entries for # alerts with the same |key|. alerts.to_sql('alerts', conn, if_exists='replace') finally: conn.close()
def FetchTimeseriesData(args): def _MatchesAllFilters(test_path): return all(f in test_path for f in args.filters) api = dashboard_api.PerfDashboardCommunicator(args) con = sqlite3.connect(args.database_file) try: test_paths = api.ListTestPaths(args.benchmark, sheriff=args.sheriff) if args.filters: test_paths = filter(_MatchesAllFilters, test_paths) print '%d test paths found!' % len(test_paths) for test_path in test_paths: data = api.GetTimeseries(test_path, days=args.days) timeseries = tables.timeseries.DataFrameFromJson(data) pandas_sqlite.InsertOrReplaceRecords(timeseries, 'timeseries', con) finally: con.close()
def _ApiAndDbSession(args): """Context manage a session with API and DB connections. Ensures API has necessary credentials and DB tables have been initialized. """ api = dashboard_api.PerfDashboardCommunicator(args) con = sqlite3.connect(args.database_file) # Tell sqlite to use a write-ahead log, which drastically increases its # concurrency capabilities. This helps prevent 'database is locked' exceptions # when we have many workers writing to a single database. This mode is sticky, # so we only need to set it once and future connections will automatically # use the log. More details are available at https://www.sqlite.org/wal.html. con.execute('PRAGMA journal_mode=WAL') try: tables.CreateIfNeeded(con) yield api, con finally: con.close()
def FetchAlertsData(args): dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args) alerts = dashboard_communicator.GetAlertData(args.benchmark, args.days)['anomalies'] print '%s alerts found!' % len(alerts) bug_ids = set() with database.Database(args.database_file) as db: for alert in alerts: alert = models.Alert.FromJson(alert) db.Put(alert) if alert.bug_id is not None: bug_ids.add(alert.bug_id) # TODO(#4281): Do not fetch data for bugs already in the db. print 'Collecting data for %d bugs.' % len(bug_ids) for bug_id in bug_ids: data = dashboard_communicator.GetBugData(bug_id) bug = models.Bug.FromJson(data['bug']) db.Put(bug)
def FetchAlertsData(args): api = dashboard_api.PerfDashboardCommunicator(args) con = sqlite3.connect(args.database_file) try: alerts = tables.alerts.DataFrameFromJson( api.GetAlertData(args.benchmark, args.days)) print '%d alerts found!' % len(alerts) pandas_sqlite.InsertOrReplaceRecords(con, 'alerts', alerts) bug_ids = set(alerts['bug_id'].unique()) bug_ids.discard(0) # A bug_id of 0 means untriaged. print '%d bugs found!' % len(bug_ids) if args.use_cache and tables.bugs.HasTable(con): known_bugs = set( b for b in bug_ids if tables.bugs.Get(con, b) is not None) if known_bugs: print '(skipping %d bugs already in the database)' % len(known_bugs) bug_ids.difference_update(known_bugs) bugs = tables.bugs.DataFrameFromJson(api.GetBugData(bug_ids)) pandas_sqlite.InsertOrReplaceRecords(con, 'bugs', bugs) finally: con.close()
def FetchAlertsData(args): # TODO(#4293): Add test coverage. dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args) alerts = dashboard_communicator.GetAlertData( args.benchmark, args.days)['anomalies'] print '%s alerts found!' % len(alerts) with database.Database(args.database_file) as db: for alert in alerts: db.Put(alert_model.Alert.FromJson(alert)) return # pylint: disable=unreachable # TODO(#4281): Also fetch and store bug data. bug_list = set([a.get('bug_id') for a in alerts]) print 'Collecting data for %d bugs.' % len(bug_list) bugs = {} for bug in bug_list: bugs[bug] = GetBugData(dashboard_communicator, bug)['bug'] data = {'bugs': bugs, 'alerts': alerts} with open(args.output_path, 'w') as fp: print 'Saving data to %s.' % args.output_path json.dump(data, fp, sort_keys=True, indent=2)