def get_short_and_long_spinners(pings): properties = [ "clientId", "payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_LONG_MS", "payload/histograms/FX_TAB_SWITCH_SPINNER_VISIBLE_MS", "environment/system/os/name", "application/buildId", "environment/settings/e10sEnabled" ] ping_props = get_pings_properties(pings, properties) windows_pings_only = ping_props.filter(windows_only) e10s_enabled_on_windows_pings_only = windows_pings_only.filter( e10s_enabled_only) grouped_spinners = e10s_enabled_on_windows_pings_only \ .repartition(200) \ .map(long_spinners_keyed_by_build_and_client) \ .reduceByKey(add_tuple_series) final_result_long = collect_aggregated_spinners( grouped_spinners, bucket_by_long_severity_per_client) final_result_short = collect_aggregated_spinners( grouped_spinners, bucket_by_short_severity_per_client) if round(final_result_short[0][1][2:].sum(), 3) == round(final_result_long[0][1][1], 3): print("Short and long counts match") else: print("Error: Short and long counts do not match") return { 'long': final_result_long, 'short': final_result_short, }
def convert_pings(sqlContext, pings, data_frame_config): """Performs basic data pipelining on raw telemetry pings """ filtered_pings = get_pings_properties( pings, data_frame_config.get_paths()).filter(data_frame_config.ping_filter) return convert_rdd(sqlContext, filtered_pings, data_frame_config)
def get_data(sc, config, date): date_str = date.strftime("%Y%m%d") pings = (Dataset.from_source("telemetry").where(docType='crash').where( submissionDate=lambda b: b.startswith(date_str)).where( appUpdateChannel=config['channel']).records( sc, sample=config['sample_size'])) properties = [ "environment/system/os/name", "environment/system/os/version", "application/architecture", "application/buildId", "payload/processType", "payload/stackTraces/crash_info/crashing_thread", "payload/stackTraces/threads", "payload/stackTraces/modules", ] try: pings_props = get_pings_properties(pings, properties, with_processes=True) except ValueError: return None return pings_props.map(map_to_hang_format)
def get_pings_for_channel(channel, fraction): end_date = datetime.datetime.now() start_date = end_date - datetime.timedelta(DaysPerWeek * 3) date_range = (fmt_date(start_date), fmt_date(end_date)) args = { 'app': 'Firefox', 'schema': 'v4', 'submission_date': date_range, 'channel': channel, 'fraction': fraction, } pings = get_pings(sc, **args) pings = get_pings_properties(pings, [ 'clientId', GfxAdaptersKey, SystemOsNameKey, ]) pings = get_one_ping_per_client(pings) # Only care about Windows for now. pings = pings.filter( lambda p: p.get(SystemOsNameKey, None) == 'Windows_NT') pings = pings.filter(has_valid_adapter) return pings
def pings_to_df(sqlContext, pings, data_frame_config): """Performs simple data pipelining on raw pings Arguments: data_frame_config: a list of tuples of the form: (name, path, cleaning_func, column_type) """ def build_cell(ping, column_config): """Takes a json ping and a column config and returns a cleaned cell""" raw_value = ping[column_config.path] func = column_config.cleaning_func if func is not None: try: return func(raw_value) except: return None else: return raw_value def ping_to_row(ping): return [build_cell(ping, col) for col in data_frame_config.columns] filtered_pings = get_pings_properties(pings, data_frame_config.get_paths()) return sqlContext.createDataFrame(filtered_pings.map(ping_to_row), schema=data_frame_config.toStructType())
def get_data(sc): pings = Dataset.from_source("telemetry").where( docType='main', submissionDate=(date.today() - timedelta(1)).strftime("%Y%m%d"), appUpdateChannel="nightly").records(sc, sample=0.1) return get_pings_properties(pings, ["clientId", "environment/system/os/name"])
def ReduceRawPings(pings): return get_pings_properties(pings, [ 'clientId', 'creationDate', ArchKey, Wow64Key, CpuKey, FxVersionKey, GfxAdaptersKey, GfxFeaturesKey, OSNameKey, OSVersionKey, OSServicePackMajorKey, ])
def reduce_pings(pings): return get_pings_properties(pings, [ 'clientId', "creationDate", "environment/build/version", "environment/build/buildId", "environment/system/memoryMB", "environment/system/isWow64", "environment/system/cpu", "environment/system/os/name", "environment/system/os/version", "environment/system/os/servicePackMajor", "environment/system/gfx/adapters", "payload/info/revision", ] + PropertyList)
def get_data(sc, config): start_date = (datetime.today() - timedelta(days=config['days_to_aggregate'])) start_date_str = start_date.strftime("%Y%m%d") end_date = (datetime.today() - timedelta(days=0)) end_date_str = end_date.strftime("%Y%m%d") pings = (Dataset.from_source("telemetry").where(docType='main').where( appBuildId=lambda b: (b.startswith(start_date_str) or b > start_date_str) and (b.startswith(end_date_str) or b < end_date_str)).where( appUpdateChannel="nightly").records(sc, sample=config['sample_size'])) properties = [ "environment/system/os/name", "application/buildId", "payload/info/subsessionLength", "payload/childPayloads", "payload/threadHangStats" ] return get_pings_properties(pings, properties, with_processes=True)
def get_data(sc, config, date, end_date=None): if config['TMP_use_crashes']: return crashes.get_data(sc, config, date) if end_date is None: end_date = date date_str = date.strftime("%Y%m%d") end_date_str = end_date.strftime("%Y%m%d") pings = (Dataset.from_source("telemetry") .where(docType='OTHER') .where(appBuildId=lambda b: b[:8] >= date_str and b[:8] <= end_date_str) .where(appUpdateChannel=config['channel']) .records(sc, sample=config['sample_size'])) pings = pings.filter(lambda p: p.get('meta', {}).get('docType', {}) == 'bhr') if config['exclude_modules']: properties = ["environment/system/os/name", "environment/system/os/version", "application/architecture", "application/buildId", "payload/hangs", "payload/timeSinceLastPing"] else: properties = ["environment/system/os/name", "environment/system/os/version", "application/architecture", "application/buildId", "payload/modules", "payload/hangs", "payload/timeSinceLastPing"] try: return get_pings_properties(pings, properties, with_processes=True) except ValueError: return None