def process_stream(self, rdd):
        """
         Args rdd: rdd
        :rtype: None
        """
        if rdd.isEmpty():
            print("RDD is empty")
        else:
            df = rdd.toDF()
            # downsample data
            df2 = df.withColumn("timestamp", df.ts.cast("timestamp"))
            downsampled_df = df2.groupBy(
                'id',
                window("timestamp", "1 second").alias("ds_ts")).agg(
                    F.round(F.avg("val"), 2).alias('downsample_avg'))
            final_df = downsampled_df.select(
                "id", downsampled_df['ds_ts'].start.alias("start_ts"),
                "downsample_avg").orderBy('start_ts', ascending=True)

            # write to timescale
            try:
                connector = pgConnector.PostgresConnector(
                    "ec2-3-94-71-208.compute-1.amazonaws.com", "datanodedb",
                    "datanode", "password")
                connector.write(final_df, "downsampled_table", "append")

            except Exception as e:
                print(e)
                pass
Esempio n. 2
0
    def process_stream(self, rdd):
        """
        Args rdd: rdd
        :rtype: None
        """
        def detect_anomaly(sensor_readings, running_avg, std_dev):
            """
            Args:
                sensor_readings: List(float)
                running_avg: float
                std_dev: float
            :rtype: int
            """
            anomalies = []
            for x, (i, y) in zip(sensor_readings, enumerate(running_avg)):
                upper_limit = running_avg[i - 1] + 3 * std_dev
                lower_limit = running_avg[i - 1] - 3 * std_dev
                if (x > upper_limit) or (x < lower_limit):
                    anomalies.append(x)
            return len(anomalies)

        if rdd.isEmpty():

            print("RDD is empty")
        else:
            df = rdd.toDF().cache()
            w = (Window().partitionBy(col("id")).rowsBetween(-1, 1))
            df = df.withColumn('rolling_average', F.avg("val").over(w))
            agg_df = df.groupBy(['id']).agg(
                F.collect_list("val").alias("sensor_reading"),
                first("ts").cast('timestamp').alias("start_ts"),
                last("ts").cast('timestamp').alias("end_ts"),
                F.round(F.stddev("val"), 3).alias("std_temp"),
                F.collect_list("rolling_average").alias("rol_avg"))
            agg_df.show()
            anomaly_udf = udf(detect_anomaly, IntegerType())
            processed_df = agg_df.withColumn(
                "num_anomaly",
                anomaly_udf("sensor_reading", "rol_avg",
                            "std_temp")).sort(desc("num_anomaly"))
            final_df = processed_df.withColumn(
                "anomaly",
                F.when(F.col("num_anomaly") > 1, True).otherwise(False))
            final_df = final_df.select("id", "start_ts", "end_ts", "std_temp",
                                       "num_anomaly", "anomaly")
            try:
                connector = pgConnector.PostgresConnector(
                    "ec2-3-94-71-208.compute-1.amazonaws.com", "datanodedb",
                    "datanode", "password")
                connector.write(final_df, "anomaly_window_tbl", "append")

            except Exception as e:
                print(e)
                pass
Esempio n. 3
0
    def process_df(self, df):
        def detect_anomaly(ts):
            """
            Args ts: pandas.series
            rtype: int
            """
            outliers_indices = seasonal_esd(
                ts, hybrid=True, max_anomalies=10)
            return len(outliers_indices)

        grouped_df = df.groupBy(["id"]).agg(F.collect_list("downsample_avg").alias(
            "downsampled_ts"), first("start_ts").alias("start_ts"), last("end_ts").alias("end_ts"))
        anomaly_udf = udf(detect_anomaly, IntegerType())
        processed_df = grouped_df.withColumn("num_anomaly", anomaly_udf(
            "downsampled_avg")).sort(desc("num_anomaly"))
        final_df = processed_df.select(
            "id", "start_ts", "end_ts", "num_anomaly")
        try:
            connector = pgConnector.PostgresConnector(
                "ec2-3-94-71-208.compute-1.amazonaws.com", "datanodedb", "datanode", "password")
            connector.write(final_df, "global_anomalies_table", "append")
        except Exception as e:
            print(e)
            pass
Esempio n. 4
0
def initDbConnection():
    conn = pgConnector.PostgresConnector()
    return conn