Esempio n. 1
0
def FetchTimeseriesData(args):
    def _MatchesAllFilters(test_path):
        return all(f in test_path for f in args.filters)

    api = dashboard_api.PerfDashboardCommunicator(args)
    with tables.DbSession(args.database_file) as con:
        # Get test_paths.
        if args.benchmark is not None:
            api = dashboard_api.PerfDashboardCommunicator(args)
            test_paths = api.dashboard.ListTestPaths(args.benchmark,
                                                     sheriff=args.sheriff)
        elif args.input_file is not None:
            test_paths = list(_ReadTestPathsFromFile(args.input_file))
        elif args.study is not None:
            test_paths = list(args.study.IterTestPaths(api))
        else:
            raise ValueError('No source for test paths specified')

        # Apply --filter's to test_paths.
        if args.filters:
            test_paths = filter(_MatchesAllFilters, test_paths)
        num_found = len(test_paths)
        print '%d test paths found!' % num_found

        # Filter out test_paths already in cache.
        if args.use_cache:
            test_paths = list(_IterStaleTestPaths(con, test_paths))
            num_skipped = num_found - len(test_paths)
            if num_skipped:
                print '(skipping %d test paths already in the database)' % num_skipped

    # Use worker pool to fetch test path data.
    total_seconds = worker_pool.Run(
        'Fetching data of %d timeseries: ' % len(test_paths),
        _FetchTimeseriesWorker, args, test_paths)
    print '[%.1f test paths per second]' % (len(test_paths) / total_seconds)

    if args.output_csv is not None:
        print
        print 'Post-processing data for study ...'
        dfs = []
        with tables.DbSession(args.database_file) as con:
            for test_path in test_paths:
                df = tables.timeseries.GetTimeSeries(con, test_path)
                dfs.append(df)
        df = studies.PostProcess(pandas.concat(dfs, ignore_index=True))
        with utils.OpenWrite(args.output_csv) as f:
            df.to_csv(f, index=False)
        print 'Wrote timeseries data to:', args.output_csv
Esempio n. 2
0
def FetchAlertsData(args):
    api = dashboard_api.PerfDashboardCommunicator(args)
    with tables.DbSession(args.database_file) as con:
        # Get alerts.
        num_alerts = 0
        bug_ids = set()
        # TODO: This loop may be slow when fetching thousands of alerts, needs a
        # better progress indicator.
        for data in api.IterAlertData(args.benchmark, args.sheriff, args.days):
            alerts = tables.alerts.DataFrameFromJson(data)
            pandas_sqlite.InsertOrReplaceRecords(con, 'alerts', alerts)
            num_alerts += len(alerts)
            bug_ids.update(alerts['bug_id'].unique())
        print '%d alerts found!' % num_alerts

        # Get set of bugs associated with those alerts.
        bug_ids.discard(0)  # A bug_id of 0 means untriaged.
        print '%d bugs found!' % len(bug_ids)

        # Filter out bugs already in cache.
        if args.use_cache:
            known_bugs = set(b for b in bug_ids
                             if tables.bugs.Get(con, b) is not None)
            if known_bugs:
                print '(skipping %d bugs already in the database)' % len(
                    known_bugs)
                bug_ids.difference_update(known_bugs)

    # Use worker pool to fetch bug data.
    total_seconds = worker_pool.Run(
        'Fetching data of %d bugs: ' % len(bug_ids), _FetchBugsWorker, args,
        bug_ids)
    print '[%.1f bugs per second]' % (len(bug_ids) / total_seconds)
Esempio n. 3
0
def FetchTimeseriesData(args):
  def _MatchesAllFilters(test_path):
    return all(f in test_path for f in args.filters)

  api = dashboard_api.PerfDashboardCommunicator(args)
  con = sqlite3.connect(args.database_file)
  try:
    tables.CreateIfNeeded(con)
    test_paths = api.ListTestPaths(args.benchmark, sheriff=args.sheriff)
    if args.filters:
      test_paths = filter(_MatchesAllFilters, test_paths)
    num_found = len(test_paths)
    print '%d test paths found!' % num_found

    if args.use_cache:
      test_paths = list(_IterStaleTestPaths(con, test_paths))
      num_skipped = num_found - len(test_paths)
      if num_skipped:
        print '(skipping %d test paths already in the database)' % num_skipped

    for test_path in test_paths:
      data = api.GetTimeseries(test_path, days=args.days)
      timeseries = tables.timeseries.DataFrameFromJson(data)
      pandas_sqlite.InsertOrReplaceRecords(con, 'timeseries', timeseries)
  finally:
    con.close()
Esempio n. 4
0
def FetchTimeseriesData(args):
    def _MatchesAllFilters(test_path):
        return all(f in test_path for f in args.filters)

    if args.benchmark is not None:
        api = dashboard_api.PerfDashboardCommunicator(args)
        test_paths = api.ListTestPaths(args.benchmark, sheriff=args.sheriff)
    elif args.input_file is not None:
        test_paths = list(_ReadTestPathsFromFile(args.input_file))
    else:
        raise NotImplementedError('Expected --benchmark or --input-file')

    if args.filters:
        test_paths = filter(_MatchesAllFilters, test_paths)
    num_found = len(test_paths)
    print '%d test paths found!' % num_found

    con = sqlite3.connect(args.database_file)
    try:
        tables.CreateIfNeeded(con)
        if args.use_cache:
            test_paths = list(_IterStaleTestPaths(con, test_paths))
            num_skipped = num_found - len(test_paths)
            if num_skipped:
                print '(skipping %d test paths already in the database)' % num_skipped
    finally:
        con.close()

    total_seconds = worker_pool.Run(
        'Fetching data of %d timeseries: ' % len(test_paths),
        _FetchTimeseriesWorker, args, test_paths)
    print '[%.1f test paths per second]' % (len(test_paths) / total_seconds)
Esempio n. 5
0
def FetchAlertsData(args):
    api = dashboard_api.PerfDashboardCommunicator(args)
    con = sqlite3.connect(args.database_file)
    try:
        tables.CreateIfNeeded(con)
        alerts = tables.alerts.DataFrameFromJson(
            api.GetAlertData(args.benchmark, args.sheriff, args.days))
        print '%d alerts found!' % len(alerts)
        pandas_sqlite.InsertOrReplaceRecords(con, 'alerts', alerts)

        bug_ids = set(alerts['bug_id'].unique())
        bug_ids.discard(0)  # A bug_id of 0 means untriaged.
        print '%d bugs found!' % len(bug_ids)
        if args.use_cache:
            known_bugs = set(b for b in bug_ids
                             if tables.bugs.Get(con, b) is not None)
            if known_bugs:
                print '(skipping %d bugs already in the database)' % len(
                    known_bugs)
                bug_ids.difference_update(known_bugs)
    finally:
        con.close()

    total_seconds = worker_pool.Run(
        'Fetching data of %d bugs: ' % len(bug_ids), _FetchBugsWorker, args,
        bug_ids)
    print '[%.1f bugs per second]' % (len(bug_ids) / total_seconds)
Esempio n. 6
0
def FetchTimeseriesData(args):
    def _MatchesAllFilters(test_path):
        return all(f in test_path for f in args.filters)

    with _ApiAndDbSession(args) as (api, con):
        # Get test_paths.
        if args.benchmark is not None:
            api = dashboard_api.PerfDashboardCommunicator(args)
            test_paths = api.ListTestPaths(args.benchmark,
                                           sheriff=args.sheriff)
        elif args.input_file is not None:
            test_paths = list(_ReadTestPathsFromFile(args.input_file))
        else:
            raise NotImplementedError('Expected --benchmark or --input-file')

        # Apply --filter's to test_paths.
        if args.filters:
            test_paths = filter(_MatchesAllFilters, test_paths)
        num_found = len(test_paths)
        print '%d test paths found!' % num_found

        # Filter out test_paths already in cache.
        if args.use_cache:
            test_paths = list(_IterStaleTestPaths(con, test_paths))
            num_skipped = num_found - len(test_paths)
            if num_skipped:
                print '(skipping %d test paths already in the database)' % num_skipped

    # Use worker pool to fetch test path data.
    total_seconds = worker_pool.Run(
        'Fetching data of %d timeseries: ' % len(test_paths),
        _FetchTimeseriesWorker, args, test_paths)
    print '[%.1f test paths per second]' % (len(test_paths) / total_seconds)
Esempio n. 7
0
def FetchTimeseriesData(args):
    dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args)
    with open(args.output_path, 'wb') as fp:
        csv_writer = csv.writer(fp)
        for row in dashboard_communicator.GetAllTimeseriesForBenchmark(
                args.benchmark, args.days, args.filters):
            csv_writer.writerow(row)
Esempio n. 8
0
def _FetchBugsWorker(args):
    api = dashboard_api.PerfDashboardCommunicator(args)
    con = sqlite3.connect(args.database_file, timeout=10)

    def Process(bug_id):
        bugs = tables.bugs.DataFrameFromJson(api.GetBugData(bug_id))
        pandas_sqlite.InsertOrReplaceRecords(con, 'bugs', bugs)

    worker_pool.Process = Process
Esempio n. 9
0
def _FetchTimeseriesWorker(args):
    api = dashboard_api.PerfDashboardCommunicator(args)
    con = sqlite3.connect(args.database_file, timeout=10)

    def Process(test_path):
        data = api.GetTimeseries(test_path, days=args.days)
        timeseries = tables.timeseries.DataFrameFromJson(data)
        pandas_sqlite.InsertOrReplaceRecords(con, 'timeseries', timeseries)

    worker_pool.Process = Process
Esempio n. 10
0
def FetchAlertsData(args):
    dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args)
    conn = sqlite3.connect(args.database_file)
    try:
        alerts = tables.alerts.DataFrameFromJson(
            dashboard_communicator.GetAlertData(args.benchmark, args.days))
        print '%s alerts found!' % len(alerts)
        # TODO: Make this update rather than replace the existing table.
        # Note that if_exists='append' does not work since there is no way to
        # specify in pandas' |to_sql| a primary key or, more generally, uniqueness
        # constraints on columns. So this would lead to duplicate entries for
        # alerts with the same |key|.
        alerts.to_sql('alerts', conn, if_exists='replace')
    finally:
        conn.close()
Esempio n. 11
0
def FetchTimeseriesData(args):
    def _MatchesAllFilters(test_path):
        return all(f in test_path for f in args.filters)

    api = dashboard_api.PerfDashboardCommunicator(args)
    con = sqlite3.connect(args.database_file)
    try:
        test_paths = api.ListTestPaths(args.benchmark, sheriff=args.sheriff)
        if args.filters:
            test_paths = filter(_MatchesAllFilters, test_paths)
        print '%d test paths found!' % len(test_paths)
        for test_path in test_paths:
            data = api.GetTimeseries(test_path, days=args.days)
            timeseries = tables.timeseries.DataFrameFromJson(data)
            pandas_sqlite.InsertOrReplaceRecords(timeseries, 'timeseries', con)
    finally:
        con.close()
Esempio n. 12
0
def _ApiAndDbSession(args):
    """Context manage a session with API and DB connections.

  Ensures API has necessary credentials and DB tables have been initialized.
  """
    api = dashboard_api.PerfDashboardCommunicator(args)
    con = sqlite3.connect(args.database_file)

    # Tell sqlite to use a write-ahead log, which drastically increases its
    # concurrency capabilities. This helps prevent 'database is locked' exceptions
    # when we have many workers writing to a single database. This mode is sticky,
    # so we only need to set it once and future connections will automatically
    # use the log. More details are available at https://www.sqlite.org/wal.html.
    con.execute('PRAGMA journal_mode=WAL')

    try:
        tables.CreateIfNeeded(con)
        yield api, con
    finally:
        con.close()
Esempio n. 13
0
def FetchAlertsData(args):
    dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args)
    alerts = dashboard_communicator.GetAlertData(args.benchmark,
                                                 args.days)['anomalies']
    print '%s alerts found!' % len(alerts)

    bug_ids = set()
    with database.Database(args.database_file) as db:
        for alert in alerts:
            alert = models.Alert.FromJson(alert)
            db.Put(alert)
            if alert.bug_id is not None:
                bug_ids.add(alert.bug_id)

        # TODO(#4281): Do not fetch data for bugs already in the db.
        print 'Collecting data for %d bugs.' % len(bug_ids)
        for bug_id in bug_ids:
            data = dashboard_communicator.GetBugData(bug_id)
            bug = models.Bug.FromJson(data['bug'])
            db.Put(bug)
Esempio n. 14
0
def FetchAlertsData(args):
  api = dashboard_api.PerfDashboardCommunicator(args)
  con = sqlite3.connect(args.database_file)
  try:
    alerts = tables.alerts.DataFrameFromJson(
        api.GetAlertData(args.benchmark, args.days))
    print '%d alerts found!' % len(alerts)
    pandas_sqlite.InsertOrReplaceRecords(con, 'alerts', alerts)

    bug_ids = set(alerts['bug_id'].unique())
    bug_ids.discard(0)  # A bug_id of 0 means untriaged.
    print '%d bugs found!' % len(bug_ids)
    if args.use_cache and tables.bugs.HasTable(con):
      known_bugs = set(
          b for b in bug_ids if tables.bugs.Get(con, b) is not None)
      if known_bugs:
        print '(skipping %d bugs already in the database)' % len(known_bugs)
        bug_ids.difference_update(known_bugs)
    bugs = tables.bugs.DataFrameFromJson(api.GetBugData(bug_ids))
    pandas_sqlite.InsertOrReplaceRecords(con, 'bugs', bugs)
  finally:
    con.close()
Esempio n. 15
0
def FetchAlertsData(args):
  # TODO(#4293): Add test coverage.
  dashboard_communicator = dashboard_api.PerfDashboardCommunicator(args)
  alerts = dashboard_communicator.GetAlertData(
      args.benchmark, args.days)['anomalies']
  print '%s alerts found!' % len(alerts)

  with database.Database(args.database_file) as db:
    for alert in alerts:
      db.Put(alert_model.Alert.FromJson(alert))

  return
  # pylint: disable=unreachable
  # TODO(#4281): Also fetch and store bug data.
  bug_list = set([a.get('bug_id') for a in alerts])
  print 'Collecting data for %d bugs.' % len(bug_list)
  bugs = {}
  for bug in bug_list:
    bugs[bug] = GetBugData(dashboard_communicator, bug)['bug']

  data = {'bugs': bugs, 'alerts': alerts}
  with open(args.output_path, 'w') as fp:
    print 'Saving data to %s.' % args.output_path
    json.dump(data, fp, sort_keys=True, indent=2)