Esempio n. 1
0
def PostProcess(df):
    # Snap stories on the same test run to the same timestamp.
    df['timestamp'] = df.groupby(['test_suite', 'bot',
                                  'point_id'])['timestamp'].transform('min')

    # Prevent the size of the output from growing without bounts. Limit for
    # DataStudio input appears to be around 100MiB.
    four_months_ago = pandas.Timestamp.utcnow() - pandas.DateOffset(months=4)
    df = df[df['timestamp'] > four_months_ago.tz_convert(None)].copy()

    # We use all runs on the latest day for each quarter as reference.
    df['quarter'] = df['timestamp'].dt.to_period('Q')
    df['reference'] = df['timestamp'].dt.date == df.groupby(
        ['quarter', 'test_suite', 'bot'])['timestamp'].transform('max').dt.date

    # Change units for values in ms to seconds, and percent values.
    df['units'] = df['units'].fillna('')
    is_ms_unit = df['units'].str.startswith('ms_')
    df.loc[is_ms_unit, 'value'] = df['value'] / 1000

    is_percentage = df['units'].str.startswith('n%_')
    df.loc[is_percentage, 'value'] = df['value'] * 100

    # Remove unused columns to save space in the output csv.
    for col in ('point_id', 'chromium_rev', 'clank_rev', 'trace_url'):
        del df[col]

    return df
Esempio n. 2
0
def GetRevisionResults(item):
    """Aggregate the results from jobs that ran on a particular revision."""
    # First load pinpoint csv results into a DataFrame. The dtype arg is needed
    # to ensure that job_id's are always read a strings (even if some of them
    # look like large numbers).
    df = pd.read_csv(RevisionResultsFile(item), dtype={'job_id': str})
    assert df['change'].str.contains(item['revision']).all(), (
        'Not all results match the expected git revision')

    # Filter out and keep only the measurements and stories that we want.
    df = df[df['name'].isin(MEASUREMENTS)]
    df = df[df['story'].isin(ACTIVE_STORIES)]

    if not df.empty:
        # Aggregate over the results of individual stories.
        df = df.groupby(['change', 'name', 'benchmark',
                         'unit'])['mean'].agg(['mean', 'count']).reset_index()
    else:
        # Otherwise build a single row with an "empty" aggregate for this revision.
        # This is needed so we can remember in the cache that this revision has
        # been processed.
        df = pd.DataFrame(index=[0])
        df['change'] = item['revision']
        df['name'] = '(missing)'
        df['benchmark'] = '(missing)'
        df['unit'] = ''
        df['mean'] = np.nan
        df['count'] = 0

    # Convert time units from milliseconds to seconds. This is what Data Studio
    # dashboards expect.
    is_ms_unit = df['unit'].str.startswith('ms_')
    df.loc[is_ms_unit, 'mean'] = df['mean'] / 1000

    # Distinguish jobs that ran with/without the tested patch.
    df['label'] = df['change'].str.contains(r'\+').map({
        False: 'without_patch',
        True: 'with_patch'
    })

    # Add timestamp and revision information. We snap the date to noon and make
    # it naive (i.e. no timezone), so the dashboard doesn't get confused with
    # dates close to the end of day.
    date = item['timestamp'].split('T')[0] + 'T12:00:00'
    df['timestamp'] = pd.Timestamp(date)
    df['revision'] = item['revision']

    # Fake the timestamp of jobs without the patch to appear as if they ran a
    # year ago; this makes it easier to visualize and compare timeseries from
    # runs with/without the patch in Data Studio dashboards.
    df.loc[df['label'] == 'without_patch',
           'timestamp'] = (df['timestamp'] - pd.DateOffset(years=1))

    return df[[
        'revision', 'timestamp', 'label', 'benchmark', 'name', 'mean', 'count'
    ]]
Esempio n. 3
0
def TimeAgo(**kwargs):
    return pd.Timestamp.now(TZ) - pd.DateOffset(**kwargs)
Esempio n. 4
0
def Yesterday():
    return pd.Timestamp.now(TZ) - pd.DateOffset(days=1)