コード例 #1
0
    def testGetLastCommitOfDate_failed(self, find_commit):
        commit_before = ('2a66bac4', '2019-03-17T23:50:16-07:00')
        find_commit.side_effect = [None, commit_before]

        date = pd.Timestamp('2019-03-17 04:01:01', tz=pinboard.TZ)
        with self.assertRaises(ValueError):
            pinboard.GetLastCommitOfDate(date)

        cutoff_date = pd.Timestamp('2019-03-18 00:00:00', tz=pinboard.TZ)
        find_commit.assert_has_calls([mock.call(after_date=cutoff_date)])
コード例 #2
0
  def testGetLastCommitOfDate_simple(self, find_commit):
    commit_before = ('2a66bac4', '2019-03-17T23:50:16-07:00')
    commit_after = ('5aefdb31', '2019-03-18T02:41:58-07:00')
    find_commit.side_effect = [commit_after, commit_before]

    date = pd.Timestamp('2019-03-17 04:01:01', tz=pinboard.TZ)
    return_value = pinboard.GetLastCommitOfDate(date)

    cutoff_date = pd.Timestamp('2019-03-18 00:00:00', tz=pinboard.TZ)
    find_commit.assert_has_calls([
        mock.call(after_date=cutoff_date),
        mock.call(before_date=cutoff_date)])
    self.assertEqual(return_value, commit_before)
コード例 #3
0
    def testGetRevisionResults_simple(self):
        item = StateItem('2a66ba', timestamp='2019-03-17T23:50:16-07:00')
        csv = [
            'change,benchmark,story,name,unit,mean\n',
            '2a66ba,loading,story1,Total:duration,ms_smallerIsBetter,300.0\n',
            '2a66ba,loading,story2,Total:duration,ms_smallerIsBetter,400.0\n',
            '2a66ba+patch,loading,story1,Total:duration,ms_smallerIsBetter,100.0\n',
            '2a66ba+patch,loading,story2,Total:duration,ms_smallerIsBetter,200.0\n',
            '2a66ba,loading,story1,Other:metric,count_smallerIsBetter,1.0\n'
        ]
        expected_results = [
            ('without_patch', 0.35, '2018-03-17T12:00:00'),
            ('with_patch', 0.15, '2019-03-17T12:00:00'),
        ]

        filename = pinboard.RevisionResultsFile(item)
        with open(filename, 'w') as f:
            f.writelines(csv)

        with mock.patch('cli_tools.pinboard.pinboard.ACTIVE_STORIES',
                        new=['story1', 'story2']):
            df = pinboard.GetRevisionResults(item)

        self.assertEqual(len(df.index), 2)  # Only two rows of output.
        self.assertTrue((df['revision'] == '2a66ba').all())
        self.assertTrue((df['benchmark'] == 'loading').all())
        self.assertTrue((df['name'] == 'Total:duration').all())
        self.assertTrue((df['count'] == 2).all())
        df = df.set_index('label', verify_integrity=True)
        for label, value, timestamp in expected_results:
            self.assertEqual(df.loc[label, 'mean'], value)
            self.assertEqual(df.loc[label, 'timestamp'],
                             pd.Timestamp(timestamp))
コード例 #4
0
ファイル: pinboard.py プロジェクト: PublicOnce/chromium
def Main():
    SetUpLogging(level=logging.INFO)
    actions = ('start', 'collect', 'upload')
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'actions',
        metavar='ACTION',
        nargs='+',
        choices=actions + ('auto', ),
        help=("select action to perform: 'start' pinpoint jobs, 'collect' job "
              "results, 'upload' aggregated data, or 'auto' to do all in "
              "sequence."))
    parser.add_argument(
        '--date',
        type=lambda s: pd.Timestamp(s, tz=TZ),
        default=Yesterday(),
        help=(
            'Run jobs for the last commit landed on the given date (assuming '
            'MTV time). Defaults to the last commit landed yesterday.'))
    args = parser.parse_args()
    if 'auto' in args.actions:
        logging.info('=== auto run for %s ===', args.date)
        args.actions = actions

    state = LoadJobsState()
    try:
        if 'start' in args.actions:
            StartPinpointJobs(state, args.date)
        if 'collect' in args.actions:
            CollectPinpointResults(state)
    finally:
        UpdateJobsState(state)

    if 'upload' in args.actions:
        AggregateAndUploadResults(state)
コード例 #5
0
 def GetFakeResults(item):
     df = pd.DataFrame(index=[0])
     df['revision'] = item['revision']
     df['label'] = 'with_patch'
     df['benchmark'] = 'loading'
     df['name'] = 'Total:duration'
     df['timestamp'] = pd.Timestamp(item['timestamp'])
     df['count'] = 1 if item['revision'] != 'a400' else 0
     return df
コード例 #6
0
 def testFindCommit_notFound(self):
   self.subprocess.check_output.return_value = ''
   date = pd.Timestamp('2019-03-18T00:00:00', tz=pinboard.TZ)
   return_value = pinboard.FindCommit(after_date=date)
   self.subprocess.check_output.assert_called_once_with(
       ['git', 'log', '--max-count', '1', '--format=format:%H:%ct',
        '--after', '2019-03-18T00:00:00-07:00', 'origin/master'],
       cwd=pinboard.TOOLS_PERF_DIR)
   self.assertIsNone(return_value)
コード例 #7
0
def GetRevisionResults(item):
    """Aggregate the results from jobs that ran on a particular revision."""
    # First load pinpoint csv results into a DataFrame. The dtype arg is needed
    # to ensure that job_id's are always read a strings (even if some of them
    # look like large numbers).
    df = pd.read_csv(RevisionResultsFile(item), dtype={'job_id': str})
    assert df['change'].str.contains(item['revision']).all(), (
        'Not all results match the expected git revision')

    # Filter out and keep only the measurements and stories that we want.
    df = df[df['name'].isin(MEASUREMENTS)]
    df = df[df['story'].isin(ACTIVE_STORIES)]

    if not df.empty:
        # Aggregate over the results of individual stories.
        df = df.groupby(['change', 'name', 'benchmark',
                         'unit'])['mean'].agg(['mean', 'count']).reset_index()
    else:
        # Otherwise build a single row with an "empty" aggregate for this revision.
        # This is needed so we can remember in the cache that this revision has
        # been processed.
        df = pd.DataFrame(index=[0])
        df['change'] = item['revision']
        df['name'] = '(missing)'
        df['benchmark'] = '(missing)'
        df['unit'] = ''
        df['mean'] = np.nan
        df['count'] = 0

    # Convert time units from milliseconds to seconds. This is what Data Studio
    # dashboards expect.
    is_ms_unit = df['unit'].str.startswith('ms_')
    df.loc[is_ms_unit, 'mean'] = df['mean'] / 1000

    # Distinguish jobs that ran with/without the tested patch.
    df['label'] = df['change'].str.contains(r'\+').map({
        False: 'without_patch',
        True: 'with_patch'
    })

    # Add timestamp and revision information. We snap the date to noon and make
    # it naive (i.e. no timezone), so the dashboard doesn't get confused with
    # dates close to the end of day.
    date = item['timestamp'].split('T')[0] + 'T12:00:00'
    df['timestamp'] = pd.Timestamp(date)
    df['revision'] = item['revision']

    # Fake the timestamp of jobs without the patch to appear as if they ran a
    # year ago; this makes it easier to visualize and compare timeseries from
    # runs with/without the patch in Data Studio dashboards.
    df.loc[df['label'] == 'without_patch',
           'timestamp'] = (df['timestamp'] - pd.DateOffset(years=1))

    return df[[
        'revision', 'timestamp', 'label', 'benchmark', 'name', 'mean', 'count'
    ]]
コード例 #8
0
 def testFindCommit_simple(self):
   self.subprocess.check_output.return_value = '2a66bac4:1552891816\n'
   date = pd.Timestamp('2019-03-18T00:00:00', tz=pinboard.TZ)
   revision, timestamp = pinboard.FindCommit(before_date=date)
   self.subprocess.check_output.assert_called_once_with(
       ['git', 'log', '--max-count', '1', '--format=format:%H:%ct',
        '--before', '2019-03-18T00:00:00-07:00', 'origin/master'],
       cwd=pinboard.TOOLS_PERF_DIR)
   self.assertEqual(revision, '2a66bac4')
   self.assertEqual(timestamp, '2019-03-17T23:50:16-07:00')
コード例 #9
0
def FindCommit(before_date=None, after_date=None):
    """Find latest commit with optional before/after date constraints."""
    cmd = ['git', 'log', '--max-count', '1', '--format=format:%H:%ct']
    if before_date is not None:
        cmd.extend(['--before', before_date.isoformat()])
    if after_date is not None:
        cmd.extend(['--after', after_date.isoformat()])
    cmd.append('origin/master')
    line = subprocess.check_output(cmd, cwd=TOOLS_PERF_DIR).strip()
    if line:
        revision, commit_time = line.split(':')
        commit_time = pd.Timestamp(int(commit_time), unit='s',
                                   tz=TZ).isoformat()
        return revision, commit_time
    else:
        return None
コード例 #10
0
    def testAggregateAndUploadResults(self, time_ago, get_revision_results):
        state = [
            StateItem('a100', timestamp='2019-03-15', job1='completed'),
            StateItem('a200', timestamp='2019-03-16', job2='completed'),
            StateItem('a300', timestamp='2019-03-17', job3='failed'),
            StateItem('a400', timestamp='2019-03-18', job4='completed'),
            StateItem('a500', timestamp='2019-03-19', job5='completed'),
        ]

        def GetFakeResults(item):
            df = pd.DataFrame(index=[0])
            df['revision'] = item['revision']
            df['label'] = 'with_patch'
            df['benchmark'] = 'loading'
            df['name'] = 'Total:duration'
            df['timestamp'] = pd.Timestamp(item['timestamp'])
            df['count'] = 1 if item['revision'] != 'a400' else 0
            return df

        get_revision_results.side_effect = GetFakeResults
        time_ago.return_value = pd.Timestamp('2018-10-20')

        # Only process first few revisions.
        new_items, cached_df = pinboard.GetItemsToUpdate(state[:3])
        pinboard.AggregateAndUploadResults(new_items, cached_df)
        dataset_file = pinboard.CachedFilePath(pinboard.DATASET_CSV_FILE)
        df = pd.read_csv(dataset_file)
        self.assertEqual(set(df['revision']), set(['a100', 'a200']))
        self.assertTrue((df[df['reference']]['revision'] == 'a200').all())

        # Incrementally process the rest.
        new_items, cached_df = pinboard.GetItemsToUpdate(state)
        pinboard.AggregateAndUploadResults(new_items, cached_df)
        dataset_file = pinboard.CachedFilePath(pinboard.DATASET_CSV_FILE)
        df = pd.read_csv(dataset_file)
        self.assertEqual(set(df['revision']), set(['a100', 'a200', 'a500']))
        self.assertTrue((df[df['reference']]['revision'] == 'a500').all())

        # No new revisions. This should be a no-op.
        new_items, cached_df = pinboard.GetItemsToUpdate(state)
        pinboard.AggregateAndUploadResults(new_items, cached_df)

        self.assertEqual(get_revision_results.call_count, 4)
        # Uploads twice (the pkl and csv) on each call to aggregate results.
        self.assertEqual(self.upload_to_cloud.call_count, 2 * 2)