def _process_pipeline(self, read_stream):
        duration_stream = read_stream \
            .where("started_script == '/apps/ThinkAnalytics/ContentIngest/bin/ingest.sh'") \
            .aggregate(Avg(group_fields=["hostname"], aggregation_field="duration",
                           aggregation_name=self._component_name + ".ingest"))

        return [duration_stream]
 def __avg_frequency_stb_by_report_index(self, read_stream):
     return read_stream \
         .where("locked = true") \
         .where("frequency is not NULL") \
         .aggregate(Avg(group_fields=["index"],
                        aggregation_field="frequency",
                        aggregation_name=self._component_name + ".locked"))
Esempio n. 3
0
    def _process_pipeline(self, read_stream):
        stream = read_stream \
            .withColumn("process", explode(col("TopProcesses").getItem("processes"))) \
            .selectExpr("process.*", "`@timestamp`")

        return [stream.aggregate(Avg(group_fields="name", aggregation_field=field,
                                     aggregation_name=self._component_name))
                for field in ["rss", "fds", "threads", "vsz"]]
    def _agg_end2end(self, stream):
        """
        Aggregate end to end calls from STB - uservice
        :param stream:
        :return:
        """
        kwargs = {'aggregation_field': "duration_ms"}

        aggregations = [Max(**kwargs), Min(**kwargs), Avg(**kwargs)]

        return stream.aggregate(CompoundAggregation(aggregations=aggregations, aggregation_name=self._component_name,
                                                    group_fields=["tenant", "app", "status"]))
    def _agg_uservice2component_duration(self, stream):
        """
        Aggregate uservice - he component call duration
        :param stream:
        :return:
        """
        kwargs = {'aggregation_field': "duration_ms"}

        aggregations = [Max(**kwargs), Min(**kwargs), Avg(**kwargs)]

        return stream.aggregate(CompoundAggregation(aggregations=aggregations, aggregation_name=self._component_name,
                                                    group_fields=["tenant", "app", "dest", "host"]))
    def _process_pipeline(self, read_stream):
        # filter useless data
        filtered_stream = read_stream.where(
            (col("duration_ms").cast("long") != 0) &
            ~ (col("requested_url").startswith("GET /info") | col("requested_url").startswith("GET /prometheus"))
        )

        mapped_stream = filtered_stream \
            .withColumn("country",
                        when(col("stack").isNotNull(),
                             regexp_extract("stack", r".*-(\w+)$", 1))
                        .otherwise("undefined"))

        average_duration = mapped_stream.aggregate(
            Avg(group_fields=["country", "host", "app", "app_version", "api_method"],
                aggregation_field="duration_ms",
                aggregation_name=self._component_name))

        count_by_status = mapped_stream.aggregate(
            Count(group_fields=["country", "host", "app", "app_version", "api_method", "status"],
                  aggregation_name=self._component_name))

        request_stream = read_stream \
            .where(col("header_x-dev").isNotNull()) \
            .withColumn("country",
                        when(col("stack").isNotNull(),
                             regexp_extract("stack", r".*-(\w+)$", 1))
                        .otherwise("undefined"))

        count_by_app = request_stream.aggregate(
            Count(group_fields=["country", "app"],
                  aggregation_name=self._component_name + ".requests"))

        count_by_app_with_status = request_stream \
            .where(col("status").isNotNull()) \
            .withColumn("status", custom_translate_regex(
                source_field=col("status"),
                mapping={r"^2\d\d": "successful"},
                default_value="failure")) \
            .aggregate(Count(group_fields=["country", "app", "status"],
                             aggregation_name=self._component_name + ".requests"))

        count_stb_requests = request_stream \
            .aggregate(Count(group_fields=["country", "header_x-dev"],
                             aggregation_name=self._component_name + ".requests"))

        return [average_duration, count_by_status, count_stb_requests, count_by_app, count_by_app_with_status]
Esempio n. 7
0
        def aggregate(aggregation_field, group):
            """
            Build aggregated stream for each metric
            :param metric_name: name of mem/net metric which needs to be averaged.
            :return: list of streams
            """
            aggregation = Avg(group_fields=["res_kind", "group", "name"], aggregation_field=aggregation_field,
                              aggregation_name=self._component_name)
            agg_stream = read_stream \
                .select("@timestamp", "group", "res_kind", "name",
                        col("metrics.{}".format(aggregation_field)).alias(aggregation_field)) \
                .filter(
                (col("group") == group) & (col("res_kind") == "VirtualMachine") & (col(aggregation_field).isNotNull())) \
                .withColumn("name", regexp_replace("name", r"\.", "-")) \
                .aggregate(aggregation)

            return agg_stream
        def for_each_metric(metric_name):
            """
            Build aggregated stream for each metric
            :param metric_name: name of cpu metric which needs to be averaged.
            :return: list of streams
            """
            aggregation = Avg(group_fields=["res_kind", "group", "name"],
                              aggregation_field=metric_name,
                              aggregation_name=self._component_name)
            agg_stream = read_stream \
                .select("@timestamp", "group", "res_kind", "name", "metrics.*") \
                .select("@timestamp", "group", "res_kind", "name", metric_name) \
                .filter(
                (col("group") == "cpu") & (col("res_kind") == "VirtualMachine")  & (col(metric_name).isNotNull())) \
                .withColumn("name", regexp_replace("name", r"\.", "-")) \
                .aggregate(aggregation)

            return agg_stream
    def _process_pipeline(self, json_stream):
        stream = json_stream \
            .selectExpr("GraphicsMemoryUsage.*", "`@timestamp`") \
            .withColumn("mapping", when(col("mapping") == "CRR (SECURE)", "crr_secure")
                        .when(col("mapping") == "GFX", "gfx")
                        .when(col("mapping") == "MAIN", "main")
                        .when(col("mapping") == "PICBUF0", "picbuf0")
                        .when(col("mapping") == "PICBUF1", "picbuf1")
                        .when(col("mapping") == "SAGE (SECURE)", "sage_secure")
                        .otherwise("unclassified")) \
            .where("mapping != 'unclassified'")

        return [
            stream.aggregate(
                Avg(group_fields="mapping",
                    aggregation_field=field,
                    aggregation_name=self._component_name))
            for field in ["totalKb", "peakKb", "freeKb"]
        ]
Esempio n. 10
0
    def _process_pipeline(self, uxp_stream):
        """
        Returns list with streams for aggragated fields.
        :param uxp_stream: input stream
        :return: list of processed streams
        """

        filtered_exp_stream = uxp_stream \
            .where(uxp_stream.url.isin(self.__processing_urls)) \
            .select(custom_translate_like(col("url"), self.__url_mapping, lit("undefined")).alias("action"),
                    col("status code").alias("statusCode"), col("responseTime"), col("@timestamp"))

        uxp_count_stream = filtered_exp_stream \
            .aggregate(Count(group_fields=["action", "statusCode"], aggregation_name=self._component_name))

        uxp_avg_response_time_stream = filtered_exp_stream \
            .aggregate(Avg(aggregation_field="responseTime", group_fields=["action"],
                           aggregation_name=self._component_name))

        return [uxp_count_stream, uxp_avg_response_time_stream]
Esempio n. 11
0
    def _process_pipeline(self, read_stream):
        count_by_payload_status_stream = read_stream \
            .aggregate(Count(group_fields=["payload_status"],
                             aggregation_name=self._component_name))

        count_by_status_stream = read_stream \
            .aggregate(Count(group_fields=["status"],
                             aggregation_name=self._component_name))

        avg_latency_by_uri_stream = read_stream \
            .aggregate(Avg(group_fields=["uri"],
                           aggregation_field="latency",
                           aggregation_name=self._component_name,
                           use_udf=True))

        count_by_uri_and_status_stream = read_stream \
            .aggregate(Count(group_fields=["uri", "status"],
                             aggregation_name=self._component_name,
                             use_udf=True))

        return [
            count_by_payload_status_stream, count_by_status_stream,
            avg_latency_by_uri_stream, count_by_uri_and_status_stream
        ]
Esempio n. 12
0
 def __wireless_average_downstream_kbps(self, common_wifi_pipeline):
     return common_wifi_pipeline \
         .where("rxKbps is not NULL") \
         .aggregate(Avg(aggregation_field="rxKbps",
                        aggregation_name=self._component_name + ".downstream_kbps"))
 def __average_usage_low_priority_mode(self, common_vm_stat_pipeline,
                                       time_in_percents):
     return common_vm_stat_pipeline \
         .select("@timestamp", col("nicePct")) \
         .aggregate(Avg(aggregation_field="nicePct",
                        aggregation_name=self._component_name + time_in_percents))
 def __average_usage_cpu_in_wait(self, common_vm_stat_pipeline,
                                 time_in_percents):
     return common_vm_stat_pipeline \
         .select("@timestamp", col("iowaitPct")) \
         .aggregate(Avg(aggregation_field="iowaitPct",
                        aggregation_name=self._component_name + time_in_percents))
 def __average_usage_hardware_interrupt(self, common_vm_stat_pipeline,
                                        time_in_percents):
     return common_vm_stat_pipeline \
         .select("@timestamp", col("hwIrqPct")) \
         .aggregate(Avg(aggregation_field="hwIrqPct",
                        aggregation_name=self._component_name + time_in_percents))
 def __average_uptime_across_stb(self, common_vm_stat_pipeline):
     return common_vm_stat_pipeline \
         .select("@timestamp", col("uptime").alias("uptime_sec")) \
         .aggregate(Avg(aggregation_field="uptime_sec",
                        aggregation_name=self._component_name))
 def __average_user_active_mode(self, common_vm_stat_pipeline,
                                time_in_percents):
     return common_vm_stat_pipeline \
         .select("@timestamp", col("userPct")) \
         .aggregate(Avg(aggregation_field="userPct",
                        aggregation_name=self._component_name + time_in_percents))
 def __avg_memory_free_kb(self, read_stream):
     return read_stream \
         .where("freeKb is not NULL") \
         .aggregate(Avg(aggregation_field="freeKb",
                        aggregation_name=self._component_name))
 def __avg_response_time_by_method(self, read_stream):
     return read_stream \
         .where("method is not null") \
         .aggregate(Avg(group_fields=["hostname", "method"],
                        aggregation_field="response_time",
                        aggregation_name=self._component_name))
 def __avg_response_time(self, read_stream):
     return read_stream \
         .aggregate(Avg(group_fields=["hostname"],
                        aggregation_field="response_time",
                        aggregation_name=self._component_name))
 def __avg_snr(self, read_stream):
     return read_stream \
         .where("SNR is not NULL") \
         .aggregate(Avg(aggregation_field="SNR",
                        aggregation_name=self._component_name))
Esempio n. 22
0
 def average_temperature(self, common_temperature_pipeline):
     return common_temperature_pipeline \
         .where(col("temperature") >= 0) \
         .aggregate(Avg(aggregation_field="temperature", group_fields=["name"],
                        aggregation_name=self._component_name))
Esempio n. 23
0
 def __avg_duration(self, read_stream):
     return read_stream\
         .where("level == 'INFO'") \
         .aggregate(Avg(group_fields=["hostname"],
                        aggregation_field="duration",
                        aggregation_name=self._component_name))
 def __avg_signal_level_dbm(self, read_stream):
     return read_stream \
         .where("signalLevel is not NULL") \
         .aggregate(Avg(aggregation_field="signalLevel",
                        aggregation_name=self._component_name + ".dbm"))
 def __ethernet_average_downstream_kbps(self, read_stream):
     return read_stream \
         .where("rxKbps is not NULL") \
         .aggregate(Avg(aggregation_field="rxKbps",
                        aggregation_name=self._component_name + ".downstream_kbps"))