Beispiel #1
0
    def _process_pipeline(self, read_stream):

        stream = read_stream.withColumn("viewer_id", col("header").getItem("viewerID"))

        distinct_count_viewer_id = stream \
            .aggregate(DistinctCount(aggregation_field="viewer_id",
                                     aggregation_name=self._component_name,
                                     aggregation_window=self._get_interval_duration("uniqCountWindow")))

        distinct_count_per_software_version_stream = stream \
            .withColumn("model_name", col("header").getItem("modelName")) \
            .aggregate(DistinctCount(group_fields=["model_name"],
                                     aggregation_field="viewer_id",
                                     aggregation_name=self._component_name))

        distinct_count_per_hardware_version_stream = stream \
            .withColumn("hardware_version", col("header").getItem("hardwareVersion")) \
            .aggregate(DistinctCount(group_fields=["hardware_version"],
                                     aggregation_field="viewer_id",
                                     aggregation_name=self._component_name))

        distinct_count_per_model_stream = stream \
            .withColumn("software_versions", explode(col("header").getItem("softwareVersions"))) \
            .withColumn("software_version", col("software_versions").getItem("version")) \
            .aggregate(DistinctCount(group_fields=["software_version"],
                                     aggregation_field="viewer_id",
                                     aggregation_name=self._component_name))

        return [distinct_count_per_software_version_stream,
                distinct_count_per_hardware_version_stream,
                distinct_count_per_model_stream,
                distinct_count_viewer_id]
 def distinct_total_cpe_with_audio_dolby_digital_not_accepted_app_user_agreement(
         self, common_settings_pipeline):
     return common_settings_pipeline \
         .filter(col("`customer.appsOptIn`") == 'false') \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name +
                                  ".cpe_with_not_accepted_app_user_agreement"))
 def _process_pipeline(self, stream):
     usage_stream = stream \
         .select("@timestamp", "UsageCollectorReport.*", col("header.viewerID").alias("viewer_id")) \
         .filter(col("UsageCollectorReport.retries") >= 1) \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".with_retries"))
     return usage_stream
 def __distinct_event_type_by_channel(self, read_stream):
     return read_stream \
         .where("event_type = 'TUNE_IN'") \
         .aggregate(DistinctCount(group_fields=["channel"],
                                  aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".tune_in",
                                  aggregation_window=self._get_interval_duration("uniqCountWindow")))
Beispiel #5
0
 def __viewer_id_distinct_count_per_loading_screens(
         self, common_screen_load_pipeline):
     return common_screen_load_pipeline \
         .where("loading_screen is not NULL") \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["loading_screen"],
                                  aggregation_name=self._component_name,
                                  aggregation_window=self._get_interval_duration("uniqCountWindow")))
 def distinct_cpe_with_age_restriction_enabled(self,
                                               common_settings_pipeline):
     return common_settings_pipeline \
         .where("`profile.ageLock` is not NULL") \
         .withColumn("profile_age_lock", col("`profile.ageLock`"), ) \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["profile_age_lock"],
                                  aggregation_name=self._component_name + ".cpe_with_age_restriction"))
 def __restarted_stbs_total_count(self, common_vm_stat_pipeline):
     return common_vm_stat_pipeline \
         .select("@timestamp", "uptime", "viewer_id") \
         .where((col("uptime") >= 0) & (col("uptime") <= 100)) \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".restarted_stbs",
                                  aggregation_window=self._get_interval_duration("uniqCountWindow")))
    def __dag_details(self, read_stream):
        """
        :param read_stream: input stream with events from dag kafka topic
        :return: list of aggregated metrics
        """
        details = ".details"

        number_of_unique_tasks_in_the_dags = read_stream \
            .filter("dag is not NULL") \
            .filter("task is not NULL") \
            .aggregate(DistinctCount(group_fields=["dag"],
                                     aggregation_field="task",
                                     aggregation_name=self._component_name + details))

        dag_host_task_count = read_stream \
            .filter("dag is not NULL") \
            .filter("hostname is not NULL") \
            .filter("task is not NULL") \
            .aggregate(Count(group_fields=["dag", "hostname", "task"],
                             aggregation_name=self._component_name + details))

        bbc_dag_subtask_message_itv_generated_with_task_count = read_stream \
            .filter("dag is not NULL") \
            .filter("task is not NULL") \
            .where("dag like '%bbc%' and subtask_message like '%ITV generated%'") \
            .aggregate(Count(group_fields=["dag", "task"],
                             aggregation_name=self._component_name + ".highres.itv_gen"))

        return [
            number_of_unique_tasks_in_the_dags, dag_host_task_count,
            bbc_dag_subtask_message_itv_generated_with_task_count
        ]
 def distinct_cpe_factory_reset_report(self, common_settings_pipeline):
     return common_settings_pipeline \
         .select("@timestamp",
                 col("`cpe.factoryResetState`").alias("cpe_factory_reset_state"),
                 "viewer_id") \
         .where("cpe_factory_reset_state is not NULL") \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["cpe_factory_reset_state"],
                                  aggregation_name=self._component_name))
 def __restarted_stbs_count_per_firmware(self, common_vm_stat_pipeline):
     return common_vm_stat_pipeline \
         .select("@timestamp", "uptime", "viewer_id", explode("software_versions.version")
                 .alias("software_version")) \
         .where((col("uptime") >= 0) & (col("uptime") <= 100)) \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["software_version"],
                                  aggregation_name=self._component_name + ".restarted_stbs_per_frimware",
                                  aggregation_window=self._get_interval_duration("uniqCountWindow")))
 def distinct_cpe_with_selected_subtitles_track_language(
         self, common_settings_pipeline):
     return common_settings_pipeline \
         .select("@timestamp",
                 col("`profile.subLang`").alias("profile_sub_lang"),
                 "viewer_id") \
         .where("profile_sub_lang is not NULL") \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["profile_sub_lang"],
                                  aggregation_name=self._component_name + ".cpe_with_selected_subtitles_language"))
 def distinct_cpe_count_recently_used_settings_items(
         self, common_settings_pipeline):
     return common_settings_pipeline \
         .select("@timestamp",
                 "viewer_id",
                 explode("`profile.recentlyUsedSettingsItems`").alias("settings_items")) \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  group_fields=["settings_items"],
                                  aggregation_name=self._component_name + ".recently_used"))
    def _process_pipeline(self, read_stream):
        stb_ids = read_stream.withColumn("stb_id", col("header.viewerID"))

        requests_count = stb_ids.aggregate(
            Count(aggregation_name=self._component_name + ".request"))
        stb_ids_distinct_count = stb_ids.aggregate(
            DistinctCount(aggregation_field="stb_id",
                          aggregation_name=self._component_name))
        return [requests_count, stb_ids_distinct_count]
 def distinct_cpe_count_with_upgrade_status(self, common_settings_pipeline):
     return common_settings_pipeline \
         .select("@timestamp",
                 "viewer_id",
                 col("`cpe.country`").alias("cpe_country"),
                 col("`cpe.upgradeStatus`").alias("upgrade_status")) \
         .where("cpe_country is not NULL") \
         .where("upgrade_status is not NULL") \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["cpe_country", "upgrade_status"],
                                  aggregation_name=self._component_name + ".cpe_count_with_upgrade_status"))
 def distinct_audio_brands_paired_with_each_cpe(self,
                                                common_settings_pipeline):
     return common_settings_pipeline \
         .select("@timestamp",
                 col("`cpe.quicksetPairedDevicesInfo`").getItem("amp").getItem("brand").alias("brand"),
                 col("`cpe.quicksetPairedDevicesInfo`").getItem("amp").getItem("isPaired").alias("is_paired"),
                 "viewer_id") \
         .where("brand is not NULL") \
         .withColumn("brand", when(col("brand") == "", "unknown_brands").otherwise(col("brand"))) \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["is_paired", "brand"],
                                  aggregation_name=self._component_name + ".audio_brands_paired_with_each_cpe"))
    def _agg_unique_count(self, stream, type):
        """
        Aggregate uservice - he component call duration
        :param stream:
        :return:
        """
        aggregation = DistinctCount(group_fields=["app", type],
                                    aggregation_field="status",
                                    aggregation_name=self._component_name)

        return stream.withColumn(type, lit("all")).aggregate(aggregation)
    def __process_common_events(self, read_stream):
        """
        Aggregation for events to calculate common metrics
        :param read_stream: input stream with events from dag kafka topic
        :return: list of aggregated metrics
        """
        dag_count = read_stream \
            .select(col("hostname"), col("@timestamp"), col("dag")) \
            .aggregate(DistinctCount(group_fields=["hostname"], aggregation_field="dag",
                                     aggregation_name=self._component_name))

        success_and_failures_counts = read_stream \
            .select(col("@timestamp"), col("task"), col("dag"), col("message")) \
            .where(col("message").like("Task exited with return code%")) \
            .withColumn("status",
                        when(col("message").like("Task exited with return code 0%"), lit("success"))
                        .otherwise(lit("failure"))) \
            .aggregate(Count(group_fields=["dag", "task", "status"], aggregation_name=self._component_name))

        return [dag_count, success_and_failures_counts]
 def __distinct_active_hosts(self, read_stream):
     return read_stream \
         .aggregate(DistinctCount(aggregation_field="hostname",
                                  aggregation_name=self._component_name))
 def __distinct_stb_high_memory_usage(self, read_stream):
     return read_stream \
         .where("usedKb > 1677721") \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".high"))
 def __distinct_stb_med_memory_usage(self, read_stream):
     return read_stream \
         .where("usedKb >= 1468006 and usedKb <= 1677721") \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".med"))
 def __distinct_stb_low_memory_usage(self, read_stream):
     return read_stream \
         .where("usedKb < 1468006") \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".low"))
Beispiel #22
0
 def __search_failures(self, error_report_stream):
     return error_report_stream \
         .where((col("code") == 8400) & (col("ctxt.search").isNotNull())) \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".search_failures",
                                  aggregation_window=self._uniq_count_window))
Beispiel #23
0
 def __rev_buffer_playouts(self, error_report_stream):
     return error_report_stream \
         .where((col("code") >= 2100) & (col("code") <= 2199)) \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".rev_buffer_playouts",
                                  aggregation_window=self._uniq_count_window))
Beispiel #24
0
 def __ltv_playout_errors(self, error_report_stream):
     return error_report_stream \
         .where((col("code") == 2004) | (col("code") == 2002)) \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".ltv_playout_errors",
                                  aggregation_window=self._uniq_count_window))
 def __host_names_unique_count(self, events):
     return events \
         .aggregate(DistinctCount(aggregation_field="hostname", aggregation_name=self._component_name))
Beispiel #26
0
 def total_cpe_net_config_for_wifi_ethernet_channels(
         self, common_net_configuration_pipeline):
     return common_net_configuration_pipeline \
         .where("type is not NULL") \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["type"],
                                  aggregation_name=self._component_name))
Beispiel #27
0
 def distinct_total_net_config_enabled(self,
                                       common_net_configuration_pipeline):
     return common_net_configuration_pipeline \
         .where("enabled is not NULL") \
         .aggregate(DistinctCount(aggregation_field="viewer_id", group_fields=["enabled"],
                                  aggregation_name=self._component_name))
Beispiel #28
0
 def __count_distinct_active_stb_wifi(self, common_wifi_pipeline):
     return common_wifi_pipeline \
         .where("rxKbps > 0") \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".active"))
Beispiel #29
0
 def __distinct_total_wifi_network_types_count(self, common_wifi_pipeline):
     return common_wifi_pipeline \
         .where((col("rxKbps") > 0) | (col("txKbps") > 0)) \
         .aggregate(DistinctCount(aggregation_field="viewer_id",
                                  aggregation_name=self._component_name + ".network"))
 def __hostname_unique_count(self, read_stream):
     return read_stream. \
         aggregate(DistinctCount(aggregation_field="hostname", aggregation_name=self._component_name))