def __dag_details(self, read_stream): """ :param read_stream: input stream with events from dag kafka topic :return: list of aggregated metrics """ details = ".details" number_of_unique_tasks_in_the_dags = read_stream \ .filter("dag is not NULL") \ .filter("task is not NULL") \ .aggregate(DistinctCount(group_fields=["dag"], aggregation_field="task", aggregation_name=self._component_name + details)) dag_host_task_count = read_stream \ .filter("dag is not NULL") \ .filter("hostname is not NULL") \ .filter("task is not NULL") \ .aggregate(Count(group_fields=["dag", "hostname", "task"], aggregation_name=self._component_name + details)) bbc_dag_subtask_message_itv_generated_with_task_count = read_stream \ .filter("dag is not NULL") \ .filter("task is not NULL") \ .where("dag like '%bbc%' and subtask_message like '%ITV generated%'") \ .aggregate(Count(group_fields=["dag", "task"], aggregation_name=self._component_name + ".highres.itv_gen")) return [ number_of_unique_tasks_in_the_dags, dag_host_task_count, bbc_dag_subtask_message_itv_generated_with_task_count ]
def _process_pipeline(self, json_stream): stream = json_stream \ .filter(col("VoiceReport.voiceReport.sessionId").isNotNull()) \ .select( col("@timestamp"), col("header.viewerID").alias("viewerID"), col("VoiceReport.voiceReport.sessionId").alias("sessionId"), col("VoiceReport.voiceReport.sessionCreationTime").alias("sessionCreationTime"), col("VoiceReport.voiceReport.audioPacketLoss").alias("audioPacketLoss"), col("VoiceReport.voiceReport.audioTransferTime").alias("audioTransferTime"), col("VoiceReport.voiceReport.transactionResult").alias("transactionResult") ) aggregation_fields = [ "sessionCreationTime", "audioPacketLoss", "audioTransferTime" ] aggregations = [] for field in aggregation_fields: kwargs = {'aggregation_field': field} aggregations.extend([ Count(**kwargs), Max(**kwargs), Min(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs) ]) return [ stream.aggregate( CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name)), stream.aggregate( Count(group_fields=[ "viewerID", "sessionId", "transactionResult" ], aggregation_name=self._component_name)) ]
def __tva_backup_deleted(self, read_stream): return read_stream.where( "message like '%TvaManagementFullOnlineIngest%' and " "message like '%TvaBackupHelper%' and " "message like '%Deleted%'") \ .aggregate(Count(aggregation_field="message", aggregation_name=self._component_name + ".tva_backup_deleted"))
def __count_requests_by_content_source_id_and_methods_and_status( self, read_stream): return read_stream \ .where("method is not null") \ .withColumn("response_successful", col("response_code").between(200, 299).cast("string")) \ .aggregate(Count(group_fields=["content_source_id", "method", "response_successful"], aggregation_name=self._component_name))
def __trace_metrics(self, events): return events \ .withColumn("counter", custom_translate_like( source_field=col("message"), mappings_pair=[ (["HTTP request received", "Referer: cdvr-bs", "<Result>success</Result>"], "vrm_success_recorded"), (["HTTP request received", "Referer: cdvr-bs", "<Result>failed</Result>"], "vrm_failed_recorded"), (["HTTP request", ":8080/RE/", "learnAction"], "reng_success_action"), (["HTTP request received", "IsAuthorized.traxis"], "irdeto_success_request"), (["HTTP request received", "User-Agent", "vod-service"], "vod_service_success"), (["HTTP request received", "x-application-name: purchase-service"], "purchase_service_success"), (["HTTP request received", "x-application-name: discovery-service"], "discovery_service_success"), (["HTTP request received", "x-application-name: epg-packager"], "epg_success"), (["HTTP request received", "x-application-name: recording-service"], "recording_service_success"), (["HTTP request received", "x-application-name: session-service"], "session_service_success") ], default_value="unclassified")) \ .where("counter != 'unclassified'") \ .aggregate(Count(group_fields=["hostname", "counter"], aggregation_name=self._component_name))
def __count_content_on_demand_errors(self, error_report_stream): return error_report_stream \ .where((col("code") == 3200) | (col("code") == 3300) | (col("code") == 3400)) \ .aggregate(Count(group_fields=["code"], aggregation_name=self._component_name + ".content_on_demand_errors"))
def _process_pipeline(self, read_stream): """This define the aggregation fields and re-use statistical functions from aggregation.py""" stream = read_stream \ .withColumn("VMStat_idlePct", col("VMStat_idlePct").cast(IntegerType())) \ .withColumn("VMStat_systemPct", col("VMStat_systemPct").cast(IntegerType())) \ .withColumn("VMStat_iowaitPct", col("VMStat_iowaitPct").cast(IntegerType())) \ .withColumn("VMStat_hwIrqPct", col("VMStat_hwIrqPct").cast(IntegerType())) \ .withColumn("MemoryUsage_freeKb", col("MemoryUsage_freeKb").cast(IntegerType())) \ .withColumn("MemoryUsage_cachedKb", col("MemoryUsage_cachedKb").cast(IntegerType())) \ .withColumn("MemoryUsage_usedKb", col("MemoryUsage_usedKb").cast(IntegerType())) \ .withColumn("VMStat_nicePct", col("VMStat_nicePct").cast(IntegerType())) \ .withColumn("VMStat_userPct", col("VMStat_userPct").cast(IntegerType())) \ .withColumn("VMStat_swIrqPct", col("VMStat_swIrqPct").cast(IntegerType())) \ .withColumn("VMStat_loadAverage", col("VMStat_loadAverage").cast(IntegerType())) aggregation_fields = ["VMStat_idlePct", "VMStat_systemPct", "VMStat_iowaitPct", "VMStat_hwIrqPct", "MemoryUsage_usedKb", "MemoryUsage_freeKb", "MemoryUsage_cachedKb", "VMStat_nicePct","VMStat_userPct", "VMStat_swIrqPct", "VMStat_loadAverage"] aggregation_fields_with_sum = ["MemoryUsage_usedKb", "MemoryUsage_freeKb", "MemoryUsage_cachedKb"] aggregations = [] for field in aggregation_fields: kwargs = {'aggregation_field': field} aggregations.extend([Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)]) if kwargs["aggregation_field"] in aggregation_fields_with_sum: aggregations.append(Sum(**kwargs)) return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name))]
def _process_pipeline(self, json_stream): schema = StructType([ StructField("TUNER", StringType()), StructField("BOARD", StringType()), StructField("WIFI", StringType()), StructField("CPU", StringType()), StructField("HDD", StringType()) ]) stream = json_stream \ .withColumn("jsonHW", from_json(col("TemperatureReport_value"), schema).alias("jsonHW")) \ .withColumn("TUNER", when(col("jsonHW.TUNER") == "-274", None).otherwise(col("jsonHW.TUNER"))) \ .withColumn("BOARD", when(col("jsonHW.BOARD") == "-274", None).otherwise(col("jsonHW.BOARD"))) \ .withColumn("WIFI", when(col("jsonHW.WIFI") == "-274", None).otherwise(col("jsonHW.WIFI"))) \ .withColumn("CPU", when(col("jsonHW.CPU") == "-274", None).otherwise(col("jsonHW.CPU"))) \ .withColumn("HDD", when(col("jsonHW.HDD") == "-274", None).otherwise(col("jsonHW.HDD"))) \ .drop("jsonHW") \ .drop("TemperatureReport_value") aggregation_fields = ["TUNER", "BOARD", "WIFI", "CPU", "HDD"] aggregations = [] for field in aggregation_fields: kwargs = {'aggregation_field': field} aggregations.extend([Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)]) return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name))]
def _process_pipeline(self, read_stream): return read_stream\ .aggregate(Count( group_fields=["hardwareVersion", "firmwareVersion", "asVersion", "appVersion", "ErrorReport_level"], aggregation_name=self._component_name) )
def __count_network_errors(self, error_report_stream): return error_report_stream \ .where((col("code").between(9003, 9006)) | (col("code").between(9993, 9996)) | (col("code") == 9031)) \ .aggregate(Count(group_fields=["code"], aggregation_name=self._component_name + ".network_errors"))
def _process_pipeline(self, read_stream): # filter useless data filtered_stream = read_stream.where( (col("duration_ms").cast("long") != 0) & ~ (col("requested_url").startswith("GET /info") | col("requested_url").startswith("GET /prometheus")) ) mapped_stream = filtered_stream \ .withColumn("country", when(col("stack").isNotNull(), regexp_extract("stack", r".*-(\w+)$", 1)) .otherwise("undefined")) average_duration = mapped_stream.aggregate( Avg(group_fields=["country", "host", "app", "app_version", "api_method"], aggregation_field="duration_ms", aggregation_name=self._component_name)) count_by_status = mapped_stream.aggregate( Count(group_fields=["country", "host", "app", "app_version", "api_method", "status"], aggregation_name=self._component_name)) request_stream = read_stream \ .where(col("header_x-dev").isNotNull()) \ .withColumn("country", when(col("stack").isNotNull(), regexp_extract("stack", r".*-(\w+)$", 1)) .otherwise("undefined")) count_by_app = request_stream.aggregate( Count(group_fields=["country", "app"], aggregation_name=self._component_name + ".requests")) count_by_app_with_status = request_stream \ .where(col("status").isNotNull()) \ .withColumn("status", custom_translate_regex( source_field=col("status"), mapping={r"^2\d\d": "successful"}, default_value="failure")) \ .aggregate(Count(group_fields=["country", "app", "status"], aggregation_name=self._component_name + ".requests")) count_stb_requests = request_stream \ .aggregate(Count(group_fields=["country", "header_x-dev"], aggregation_name=self._component_name + ".requests")) return [average_duration, count_by_status, count_stb_requests, count_by_app, count_by_app_with_status]
def __ring_status_node_errors(self, error_events): return error_events \ .where("message like '%Eventis.Cassandra.Service." "CassandraServiceException+HostRingException%'") \ .withColumn("host", regexp_extract("message", r".*Eventis\.Cassandra\.Service\.CassandraServiceException\+" r"HostRingException.*'(\S+)'.*", 1)) \ .aggregate(Count(group_fields=["hostname", "host"], aggregation_name=self._component_name + ".ring_status_node_errors"))
def __cassandra_errors(self, error_events): return error_events \ .where("message like '%Exception with cassandra node%'") \ .withColumn("host", regexp_extract("message", r".*Exception\s+with\s+cassandra\s+node\s+\'([\d\.]+).*", 1) ) \ .aggregate(Count(group_fields=["hostname", "host"], aggregation_name=self._component_name + ".cassandra_errors"))
def _process_pipeline(self, read_stream): return read_stream.aggregate( Count(group_fields=[ "hardwareVersion", "firmwareVersion", "asVersion", "appVersion", "UsageCollectorReport_event_Type" ], aggregation_name=self._component_name))
def __undefined_warnings(self, warn_events): return warn_events.where( "message not like '%Unable to use alias%because alias is already used by%' and " "message not like '%One or more validation errors detected during tva ingest%'" ).aggregate( Count(group_fields=["hostname"], aggregation_name=self._component_name + ".undefined_warnings"))
def __ring_status_node_warnings(self, events): return events \ .where("message like '%Unable to determine external address " "of node with internal address %'") \ .withColumn("host", regexp_extract("message", r".*Unable\s+to\s+determine\s+external\s+address\s+of\s+" r"node\s+with\s+internal\s+address\s+'(\S+)'.*", 1)) \ .aggregate(Count(group_fields=["hostname", "host"], aggregation_name=self._component_name + ".ring_status_node_warnings"))
def __process_hi_res_events(self, read_stream): """ Aggregation for events with information about loading high_resolution images :param read_stream: input stream with events from dag kafka topic :return: list of aggregated metrics """ perform_high_res_images_events = read_stream \ .where("task == 'perform_high_resolution_images_qc'") def __process_images_processed_status(column_name, regex_group_number, component_suffix): """ Calculate aggregated metric for specific column :param column_name: New column name for value of processed images :param regex_group_number: index of group in regex pattern :param component_suffix: name of suffix for metric :return: aggregated metric for specific column """ return perform_high_res_images_events \ .where("subtask_message like 'Images processed:%'") \ .withColumn(column_name, regexp_extract("subtask_message", r"^Images processed: qc_success: (\d+), qc_retry: (\d+), qc_error: (\d+).*", regex_group_number)) \ .aggregate(Sum(group_fields=["dag", "task"], aggregation_field=column_name, aggregation_name=self._component_name + "." + component_suffix)) perform_high_res_images_processed_success_sum = \ __process_images_processed_status("images_success", 1, "hi_res_images_processed_success") perform_high_res_images_processed_retry_sum = \ __process_images_processed_status("images_retry", 2, "hi_res_images_processed_retry") perform_high_res_images_processed_error_sum = \ __process_images_processed_status("images_error", 3, "hi_res_images_processed_error") __mapping_image_type = [ (["image_type='HighResPortrait'", "status='qc_success'"], "hi_res_images_portrait"), (["image_type='HighResLandscape'", "status='qc_success'"], "hi_res_images_landscape") ] perform_high_res_images_type_count = perform_high_res_images_events \ .withColumn("image_type", custom_translate_like(source_field=col("subtask_message"), mappings_pair=__mapping_image_type, default_value="unclassified")) \ .where("image_type != 'unclassified'") \ .aggregate(Count(group_fields=["dag", "task", "image_type"], aggregation_name=self._component_name)) return [ perform_high_res_images_processed_success_sum, perform_high_res_images_processed_retry_sum, perform_high_res_images_processed_error_sum, perform_high_res_images_type_count ]
def _process_pipeline(self, read_stream): stb_ids = read_stream.withColumn("stb_id", col("header.viewerID")) requests_count = stb_ids.aggregate( Count(aggregation_name=self._component_name + ".request")) stb_ids_distinct_count = stb_ids.aggregate( DistinctCount(aggregation_field="stb_id", aggregation_name=self._component_name)) return [requests_count, stb_ids_distinct_count]
def __count_player_live_tv_errors(self, error_report_stream): return error_report_stream \ .where((col("code").between(2000, 2002)) | (col("code") == 2004) | (col("code") == 2006) | (col("code") == 2010) | (col("code") == 2020) | (col("code") == 2050)) \ .aggregate(Count(group_fields=["code"], aggregation_name=self._component_name + ".player_live_tv_errors"))
def _agg_uservice2component_count(self, stream): """ Aggregate uservice - he component call counts :param stream: :return: """ aggregation = Count(group_fields=["tenant", "app", "dest", "calls"], aggregation_field="requests", aggregation_name=self._component_name) return stream.aggregate(aggregation)
def _agg_count(self, stream, type): """ Aggregate uservice - he component call counts :param stream: :return: """ aggregation = Count(group_fields=["app", type], aggregation_field="status", aggregation_name=self._component_name) return stream.aggregate(aggregation)
def __memory_flushing(self, events): return events \ .where("message like '%Flushing%'") \ .withColumn("column_family", custom_translate_like( source_field=col("message"), mappings_pair=[(["Channels"], "channels"), (["Titles"], "titles"), (["Groups"], "groups")], default_value="unclassified")) \ .where("column_family != 'unclassified'") \ .aggregate(Count(group_fields=["column_family"], aggregation_name=self._component_name + ".memory_flushing"))
def _process_pipeline(self, json_stream): stream = json_stream .withColumn("UsageCollectorReport_missed_events", col("UsageCollectorReport_missed_events").cast(IntegerType())) kwargs = {"aggregation_field": "UsageCollectorReport_missed_events"} aggregations = [Sum(**kwargs), Count(**kwargs), Max(**kwargs), Min(**kwargs), Stddev(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs)] return [stream.aggregate(CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name))]
def __count_replay_playback_errors(self, error_report_stream): return error_report_stream \ .where((col("code") == 2505) | (col("code") == 2507) | (col("code") == 2510) | (col("code") == 2511) | (col("code") == 2512) | (col("code") == 2514) | (col("code") == 2517) | (col("code") == 2518)) \ .aggregate(Count(group_fields=["code"], aggregation_name=self._component_name + ".replay_playback_errors"))
def __count_player_recording_errors(self, error_report_stream): return error_report_stream \ .where((col("code") == 2200) | (col("code") == 2205) | (col("code") == 2207) | (col("code") == 2211) | (col("code") == 2212) | (col("code") == 2214) | (col("code") == 2217) | (col("code") == 2218)) \ .aggregate(Count(group_fields=["code"], aggregation_name=self._component_name + ".player_recording_errors"))
def __info_metrics(self, events): return events \ .withColumn("counter", custom_translate_like( source_field=col("message"), mappings_pair=[ (["Loading tva version", "took"], "metadata_success") ], default_value="unclassified")) \ .where("counter != 'unclassified'") \ .aggregate(Count(group_fields=["hostname", "counter"], aggregation_name=self._component_name))
def __process_hi_res_on_mpx_events(self, read_stream): """ Aggregation for events with information about high_resolution and loading to mpx :param read_stream: input stream with events from dag kafka topic :return: list of aggregated metrics """ upload_high_res_images_created_on_mpx_count = read_stream \ .where("task == 'upload_high_resolution_images_to_mpx'") \ .where("subtask_message like '%Image was created on MPX:%'") \ .aggregate(Count(group_fields=["dag", "task"], aggregation_name=self._component_name + ".hi_res_images_created_on_mpx")) return [upload_high_res_images_created_on_mpx_count]
def __count_player_review_buffer_errors(self, error_report_stream): return error_report_stream \ .where((col("code") == 2100) | (col("code") == 2105) | (col("code") == 2107) | (col("code") == 2111) | (col("code") == 2112) | (col("code") == 2114) | (col("code") == 2117) | (col("code") == 2118) | (col("code") == 2120) | (col("code") == 2130)) \ .aggregate(Count(group_fields=["code"], aggregation_name=self._component_name + ".player_review_buffer_errors"))
def __warn_metrics(self, events): return events \ .withColumn("counter", custom_translate_like( source_field=col("message"), mappings_pair=[ (["Error", ":8080/RE"], "reng_error_action"), (["Genre", "is not known"], "metadata_warning"), (["Invalid parameter"], "invalid_parameter_warning") ], default_value="unclassified")) \ .where("counter != 'unclassified'") \ .aggregate(Count(group_fields=["hostname", "counter"], aggregation_name=self._component_name))
def _process_pipeline(self, read_stream): pre_result_df = self._prepare_input_data_frame(read_stream) aggregation_fields_without_sum = TunerPerfReport.get_column_names( "TunerReport_SNR") aggregation_fields_without_sum.extend( TunerPerfReport.get_column_names("TunerReport_signalLevel")) aggregation_fields_with_sum = TunerPerfReport.get_column_names( "TunerReport_erroreds") aggregation_fields_with_sum.extend( TunerPerfReport.get_column_names("TunerReport_unerroreds")) aggregation_fields_with_sum.extend( TunerPerfReport.get_column_names("TunerReport_correcteds")) aggregations_ls = [] aggregations_ls.extend(aggregation_fields_without_sum) aggregations_ls.extend(aggregation_fields_with_sum) aggregations = [] for field in aggregations_ls: kwargs = {'aggregation_field': field} aggregations.extend([ Count(**kwargs), Max(**kwargs), Min(**kwargs), P01(**kwargs), P05(**kwargs), P10(**kwargs), P25(**kwargs), P50(**kwargs), P75(**kwargs), P90(**kwargs), P95(**kwargs), P99(**kwargs) ]) if kwargs["aggregation_field"] in aggregation_fields_with_sum: aggregations.append(Sum(**kwargs)) return [ pre_result_df.aggregate( CompoundAggregation(aggregations=aggregations, group_fields=self.__dimensions, aggregation_name=self._component_name)) ]