Example #1
0
def get_short_and_long_spinners(pings):

    properties = [
        "clientId", "payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS",
        "payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_MS",
        "environment/system/os/name", "application/buildId",
        "environment/settings/e10sEnabled"
    ]

    ping_props = get_pings_properties(pings, properties)

    windows_pings_only = ping_props.filter(windows_only)
    e10s_enabled_on_windows_pings_only = windows_pings_only.filter(
        e10s_enabled_only)
    grouped_spinners = e10s_enabled_on_windows_pings_only \
        .repartition(200) \
        .map(long_spinners_keyed_by_build_and_client) \
        .reduceByKey(add_tuple_series)

    final_result_long = collect_aggregated_spinners(
        grouped_spinners, bucket_by_long_severity_per_client)

    final_result_short = collect_aggregated_spinners(
        grouped_spinners, bucket_by_short_severity_per_client)

    if round(final_result_short[0][1][2:].sum(),
             3) == round(final_result_long[0][1][1], 3):
        print("Short and long counts match")
    else:
        print("Error: Short and long counts do not match")

    return {
        'long': final_result_long,
        'short': final_result_short,
    }
Example #2
0
def convert_pings(sqlContext, pings, data_frame_config):
    """Performs basic data pipelining on raw telemetry pings """
    filtered_pings = get_pings_properties(
        pings,
        data_frame_config.get_paths()).filter(data_frame_config.ping_filter)

    return convert_rdd(sqlContext, filtered_pings, data_frame_config)
def get_data(sc, config, date):
    date_str = date.strftime("%Y%m%d")

    pings = (Dataset.from_source("telemetry").where(docType='crash').where(
        submissionDate=lambda b: b.startswith(date_str)).where(
            appUpdateChannel=config['channel']).records(
                sc, sample=config['sample_size']))

    properties = [
        "environment/system/os/name",
        "environment/system/os/version",
        "application/architecture",
        "application/buildId",
        "payload/processType",
        "payload/stackTraces/crash_info/crashing_thread",
        "payload/stackTraces/threads",
        "payload/stackTraces/modules",
    ]

    try:
        pings_props = get_pings_properties(pings,
                                           properties,
                                           with_processes=True)
    except ValueError:
        return None

    return pings_props.map(map_to_hang_format)
Example #4
0
def get_pings_for_channel(channel, fraction):
    end_date = datetime.datetime.now()
    start_date = end_date - datetime.timedelta(DaysPerWeek * 3)
    date_range = (fmt_date(start_date), fmt_date(end_date))

    args = {
        'app': 'Firefox',
        'schema': 'v4',
        'submission_date': date_range,
        'channel': channel,
        'fraction': fraction,
    }

    pings = get_pings(sc, **args)
    pings = get_pings_properties(pings, [
        'clientId',
        GfxAdaptersKey,
        SystemOsNameKey,
    ])
    pings = get_one_ping_per_client(pings)

    # Only care about Windows for now.
    pings = pings.filter(
        lambda p: p.get(SystemOsNameKey, None) == 'Windows_NT')
    pings = pings.filter(has_valid_adapter)
    return pings
Example #5
0
def pings_to_df(sqlContext, pings, data_frame_config):
    """Performs simple data pipelining on raw pings

    Arguments:
        data_frame_config: a list of tuples of the form:
                 (name, path, cleaning_func, column_type)
    """
    def build_cell(ping, column_config):
        """Takes a json ping and a column config and returns a cleaned cell"""
        raw_value = ping[column_config.path]
        func = column_config.cleaning_func
        if func is not None:
            try:
                return func(raw_value)
            except:
                return None
        else:
            return raw_value

    def ping_to_row(ping):
        return [build_cell(ping, col) for col in data_frame_config.columns]

    filtered_pings = get_pings_properties(pings, data_frame_config.get_paths())

    return sqlContext.createDataFrame(filtered_pings.map(ping_to_row),
                                      schema=data_frame_config.toStructType())
Example #6
0
def get_data(sc):
    pings = Dataset.from_source("telemetry").where(
        docType='main',
        submissionDate=(date.today() - timedelta(1)).strftime("%Y%m%d"),
        appUpdateChannel="nightly").records(sc, sample=0.1)

    return get_pings_properties(pings,
                                ["clientId", "environment/system/os/name"])
Example #7
0
def ReduceRawPings(pings):
    return get_pings_properties(pings, [
        'clientId',
        'creationDate',
        ArchKey,
        Wow64Key,
        CpuKey,
        FxVersionKey,
        GfxAdaptersKey,
        GfxFeaturesKey,
        OSNameKey,
        OSVersionKey,
        OSServicePackMajorKey,
    ])
Example #8
0
def reduce_pings(pings):
    return get_pings_properties(pings, [
      'clientId',
      "creationDate",
      "environment/build/version",
      "environment/build/buildId",
      "environment/system/memoryMB",
      "environment/system/isWow64",
      "environment/system/cpu",
      "environment/system/os/name",
      "environment/system/os/version",
      "environment/system/os/servicePackMajor",
      "environment/system/gfx/adapters",
      "payload/info/revision",
    ] + PropertyList)
def get_data(sc, config):
    start_date = (datetime.today() -
                  timedelta(days=config['days_to_aggregate']))
    start_date_str = start_date.strftime("%Y%m%d")
    end_date = (datetime.today() - timedelta(days=0))
    end_date_str = end_date.strftime("%Y%m%d")

    pings = (Dataset.from_source("telemetry").where(docType='main').where(
        appBuildId=lambda b:
        (b.startswith(start_date_str) or b > start_date_str) and
        (b.startswith(end_date_str) or b < end_date_str)).where(
            appUpdateChannel="nightly").records(sc,
                                                sample=config['sample_size']))

    properties = [
        "environment/system/os/name", "application/buildId",
        "payload/info/subsessionLength", "payload/childPayloads",
        "payload/threadHangStats"
    ]

    return get_pings_properties(pings, properties, with_processes=True)
Example #10
0
def get_data(sc, config, date, end_date=None):
    if config['TMP_use_crashes']:
        return crashes.get_data(sc, config, date)

    if end_date is None:
        end_date = date

    date_str = date.strftime("%Y%m%d")
    end_date_str = end_date.strftime("%Y%m%d")

    pings = (Dataset.from_source("telemetry")
             .where(docType='OTHER')
             .where(appBuildId=lambda b: b[:8] >= date_str and b[:8] <= end_date_str)
             .where(appUpdateChannel=config['channel'])
             .records(sc, sample=config['sample_size']))

    pings = pings.filter(lambda p: p.get('meta', {}).get('docType', {}) == 'bhr')

    if config['exclude_modules']:
        properties = ["environment/system/os/name",
                      "environment/system/os/version",
                      "application/architecture",
                      "application/buildId",
                      "payload/hangs",
                      "payload/timeSinceLastPing"]
    else:
        properties = ["environment/system/os/name",
                      "environment/system/os/version",
                      "application/architecture",
                      "application/buildId",
                      "payload/modules",
                      "payload/hangs",
                      "payload/timeSinceLastPing"]

    try:
        return get_pings_properties(pings, properties, with_processes=True)
    except ValueError:
        return None