Exemple #1
0
 def test_metric_equality(self):
     self.assertEqual(
         Metric(self.raw_metrics_list[0][0]),
         Metric(self.raw_metrics_list[1][0]),
         "incorrect inequality",
     )
     self.assertNotEqual(
         Metric(self.raw_metrics_list[0][0]),
         Metric(self.raw_metrics_list[0][1]),
         "incorrect equality",
     )
Exemple #2
0
 def test_oldest_data_datetime_with_timedelta(self):
     expected_start_time = Metric(
         self.raw_metrics_list[0][0]).metric_values.iloc[4, 0]
     time_delta = (
         Metric(self.raw_metrics_list[1][0]).metric_values.iloc[-1, 0] -
         Metric(self.raw_metrics_list[0][0]).metric_values.iloc[4, 0])
     new_metric = Metric(self.raw_metrics_list[0][0],
                         oldest_data_datetime=time_delta) + Metric(
                             self.raw_metrics_list[1][0])
     self.assertEqual(expected_start_time, new_metric.start_time,
                      "Incorrect Start time after addition")
Exemple #3
0
    def train(self, metric_dict, oldest_data_datetime):
        """Train the Prophet model and store the predictions in predicted_df."""
        prediction_freq = "1MIN"

        # convert incoming metric to Metric Object
        metric = Metric(metric_dict, oldest_data_datetime)

        self._model = Prophet(daily_seasonality=True,
                              weekly_seasonality=True,
                              yearly_seasonality=True)

        _LOGGER.info("training data range: %s - %s", metric.start_time,
                     metric.end_time)

        _LOGGER.debug("begin training")

        df_fit = self._model.fit(metric.metric_values)

        if True:
            df_cv = cross_validation(self._model,
                                     horizon="1 day",
                                     period="8 hours",
                                     initial="4 days")
            df_p = performance_metrics(df_cv)
            _LOGGER.info("Performance data: %s %s", metric.metric_name, df_p)
Exemple #4
0
    def train(self, metric_data=None, prediction_duration=15):
        """Train the Prophet model and store the predictions in predicted_df."""
        prediction_freq = "1MIN"
        # convert incoming metric to Metric Object
        if metric_data:
            # because the rolling_data_window_size is set, this df should not bloat
            self.metric += Metric(metric_data)

        # Don't really need to store the model, as prophet models are not retrainable
        # But storing it as an example for other models that can be retrained
        self.model = Prophet(daily_seasonality=True,
                             weekly_seasonality=True,
                             yearly_seasonality=True)

        _LOGGER.info("training data range: %s - %s", self.metric.start_time,
                     self.metric.end_time)
        # _LOGGER.info("training data end time: %s", self.metric.end_time)
        _LOGGER.debug("begin training")

        self.model.fit(self.metric.metric_values)
        future = self.model.make_future_dataframe(
            periods=int(prediction_duration),
            freq=prediction_freq,
            include_history=False,
        )
        forecast = self.model.predict(future)
        forecast["timestamp"] = forecast["ds"]
        forecast = forecast[["timestamp", "yhat", "yhat_lower", "yhat_upper"]]
        forecast = forecast.set_index("timestamp")
        self.predicted_df = forecast
        _LOGGER.debug(forecast)
Exemple #5
0
    def train(self,
              metric_data=None,
              prediction_duration=15,
              seasonality=None,
              deviations=3):
        """Train the Prophet model and store the predictions in predicted_df."""
        prediction_freq = "30s"
        if metric_data:
            self.metric += Metric(metric_data)

        self.model = Prophet(daily_seasonality=seasonality == "daily",
                             weekly_seasonality=seasonality == "weekly",
                             yearly_seasonality=seasonality == "yearly")

        _LOGGER.info("training data range: %s - %s", self.metric.start_time,
                     self.metric.end_time)
        _LOGGER.debug("begin training")

        self.model.fit(self.metric.metric_values)
        future = self.model.make_future_dataframe(
            periods=int(prediction_duration),
            freq=prediction_freq,
            include_history=False,
        )
        forecast = self.model.predict(future)
        forecast["timestamp"] = forecast["ds"]
        forecast = forecast[["timestamp", "yhat", "yhat_lower", "yhat_upper"]]
        forecast = forecast.set_index("timestamp")
        self.predicted_df = forecast
        _LOGGER.debug(forecast)
Exemple #6
0
    async def get(self):
        """Fetch and publish metric values asynchronously."""
        # update metric value on every request and publish the metric
        for predictor_model in self.settings["model_list"]:
            # get the current metric value so that it can be compared with the
            # predicted values
            current_start_time = datetime.now(
            ) - Configuration.current_data_window_size
            current_end_time = datetime.now()

            anomaly = 0

            prediction_data_size = 0
            metric_name = predictor_model.metric.metric_name
            prediction = predictor_model.predict_value(datetime.now())

            if "size" in prediction:
                prediction_data_size = prediction['size']

            current_metric_data = pc.get_metric_range_data(
                metric_name=predictor_model.metric.metric_name,
                label_config=predictor_model.metric.label_config,
                start_time=current_start_time,
                end_time=current_end_time,
            )

            # Check for all the columns available in the prediction
            # and publish the values for each of them
            for column_name in list(prediction.columns):
                GAUGE_DICT[metric_name].labels(
                    **predictor_model.metric.label_config,
                    value_type=column_name).set(prediction[column_name][0])

            if current_metric_data and hasattr(current_metric_data, "__len__"):
                current_metric_value = Metric(current_metric_data[0])
                uncertainty_range = prediction["yhat_upper"][0] - prediction[
                    "yhat_lower"][0]
                current_value = current_metric_value.metric_values.loc[
                    current_metric_value.metric_values.ds.idxmax(), "y"]

                if (current_value > prediction["yhat_upper"][0]):
                    anomaly = (current_value -
                               prediction["yhat_upper"][0]) / uncertainty_range
                elif (current_value < prediction["yhat_lower"][0]):
                    anomaly = (current_value -
                               prediction["yhat_lower"][0]) / uncertainty_range

                # create a new time series that has value_type=anomaly
                # this value is 1 if an anomaly is found 0 if not
                GAUGE_DICT[metric_name].labels(
                    **predictor_model.metric.label_config,
                    value_type="anomaly").set(anomaly)

            GAUGE_DICT[metric_name].labels(
                **predictor_model.metric.label_config,
                value_type="size").set(prediction_data_size)

        self.write(generate_latest(REGISTRY).decode("utf-8"))
        self.set_header("Content-Type", "text; charset=utf-8")
Exemple #7
0
    def test_oldest_data_datetime_with_datetime(self):
        with self.assertRaises(TypeError,
                               msg="incorrect parameter type accepted"):
            _ = Metric(self.raw_metrics_list[0][0], oldest_data_datetime="2d")

        expected_start_time = Metric(
            self.raw_metrics_list[0][0]).metric_values.iloc[4, 0]
        new_metric = Metric(self.raw_metrics_list[0][0],
                            oldest_data_datetime=expected_start_time) + Metric(
                                self.raw_metrics_list[1][0])

        self.assertEqual(expected_start_time, new_metric.start_time,
                         "Incorrect Start time after addition")
        self.assertEqual(
            expected_start_time,
            new_metric.metric_values.iloc[0, 0],
            "Incorrect Start time after addition (in df)",
        )
Exemple #8
0
    def test_metric_end_time(self):
        end_time = datetime.datetime(2019, 7, 28, 16, 00)
        end_time_minus_1m = datetime.datetime(2019, 7, 28, 15, 59)

        test_metric_object = Metric(self.raw_metrics_list[0][0])
        self.assertTrue(test_metric_object.end_time > end_time_minus_1m,
                        "incorrect metric end time")
        self.assertTrue(test_metric_object.end_time < end_time,
                        "incorrect metric end time")
Exemple #9
0
    def test_metric_start_time(self):
        start_time = datetime.datetime(2019, 7, 28, 10, 0)
        start_time_plus_1m = datetime.datetime(2019, 7, 28, 10, 1)

        test_metric_object = Metric(self.raw_metrics_list[0][0])
        self.assertTrue(test_metric_object.start_time > start_time,
                        "incorrect metric start time")
        self.assertTrue(test_metric_object.start_time < start_time_plus_1m,
                        "incorrect metric start time")
Exemple #10
0
    def __init__(self, metric, rolling_data_window_size="10d", number_of_feature=10, validation_ratio=0.2,
                 parameter_tuning=True):
        """Initialize the Metric object."""
        self.metric = Metric(metric, rolling_data_window_size)

        self.number_of_features = number_of_feature
        self.scalar = MinMaxScaler(feature_range=(0, 1))
        self.parameter_tuning = parameter_tuning
        self.validation_ratio = validation_ratio
Exemple #11
0
    def train(self, metric_data=None, prediction_duration=15):
        """Train the Fourier model and store the predictions in pandas dataframe."""
        prediction_range = prediction_duration
        # convert incoming metric to Metric Object
        if metric_data:
            # because the rolling_data_window_size is set, this df should not bloat
            self.metric += Metric(metric_data)

        data = self.metric.metric_values
        vals = np.array(data["y"].tolist())

        _LOGGER.debug("training data start time: %s", self.metric.start_time)
        _LOGGER.debug("training data end time: %s", self.metric.end_time)
        _LOGGER.debug("begin training")

        forecast_values = self.fourier_extrapolation(vals, prediction_range,
                                                     1)  # int(len(vals)/3))
        dataframe_cols = {}
        dataframe_cols["yhat"] = np.array(forecast_values)

        # find most recent timestamp from original data and extrapolate new timestamps
        _LOGGER.debug("Creating Dummy Timestamps.....")
        maximum_time = max(data["ds"])
        dataframe_cols["timestamp"] = pd.date_range(
            maximum_time, periods=len(forecast_values), freq="min")

        # create dummy upper and lower bounds
        _LOGGER.debug("Computing Bounds .... ")

        upper_bound = np.array([(np.ma.average(
            forecast_values[:i],
            weights=np.linspace(0, 1, num=len(forecast_values[:i])),
        ) + (np.std(forecast_values[:i]) * 2))
                                for i in range(len(forecast_values))])
        upper_bound[0] = np.mean(
            forecast_values[0])  # to account for no std of a single value
        lower_bound = np.array([(np.ma.average(
            forecast_values[:i],
            weights=np.linspace(0, 1, num=len(forecast_values[:i])),
        ) - (np.std(forecast_values[:i]) * 2))
                                for i in range(len(forecast_values))])
        lower_bound[0] = np.mean(
            forecast_values[0])  # to account for no std of a single value
        dataframe_cols["yhat_upper"] = upper_bound
        dataframe_cols["yhat_lower"] = lower_bound

        # create series and index into predictions_dict
        _LOGGER.debug("Formatting Forecast to Pandas ..... ")

        forecast = pd.DataFrame(data=dataframe_cols)
        forecast = forecast.set_index("timestamp")

        self.predicted_df = forecast
        _LOGGER.debug(forecast)
Exemple #12
0
    def train(self,
              metric_data=None,
              prediction_duration=15,
              seasonality=None,
              deviations=3):
        """Train the Sarima model and store the predictions in predicted_df."""
        if metric_data:
            self.metric += Metric(metric_data)

        data = self.metric.metric_values
        values = pd.Series(self.metric.metric_values.y.values,
                           index=data["ds"])
        days = {"daily": 2, "weekly": 7, "yearly": 12}
        self.model = SARIMAX(values,
                             order=(0, 0, 0),
                             seasonal_order=(1, 1, 1, days.get(seasonality)))

        _LOGGER.info("training data range: %s - %s", self.metric.start_time,
                     self.metric.end_time)
        _LOGGER.debug("begin training")
        results = self.model.fit(method='powell')
        forecast = results.forecast(prediction_duration)
        dataframe_cols = {}
        dataframe_cols["yhat"] = np.append(values.get(-1), np.array(forecast))

        _LOGGER.debug("Creating Dummy Timestamps.....")
        maximum_time = max(data["ds"])
        dataframe_cols["timestamp"] = pd.date_range(maximum_time,
                                                    periods=len(forecast) + 1,
                                                    freq="30s")

        _LOGGER.debug("Computing Bounds .... ")

        lower_bound, upper_bound = ct.calculate_bounds(forecast, deviations)

        dataframe_cols["yhat_upper"] = np.append(values.get(-1), upper_bound)
        dataframe_cols["yhat_lower"] = np.append(values.get(-1), lower_bound)
        _LOGGER.debug("Formatting Forecast to Pandas ..... ")

        forecast = pd.DataFrame(data=dataframe_cols)
        forecast = forecast.set_index("timestamp")

        self.predicted_df = forecast
        _LOGGER.debug(forecast)
    def train(self,
              metric_data=None,
              prediction_duration=15,
              seasonality=None,
              deviations=3):
        """Train the Fourier model and store the predictions in pandas dataframe."""
        prediction_range = prediction_duration
        if metric_data:
            self.metric += Metric(metric_data)

        data = self.metric.metric_values
        vals = np.array(data["y"].tolist())

        _LOGGER.debug("training data start time: %s", self.metric.start_time)
        _LOGGER.debug("training data end time: %s", self.metric.end_time)
        _LOGGER.debug("begin training")

        forecast_values = self.fourier_extrapolation(vals, prediction_range, 1)
        dataframe_cols = {}
        dataframe_cols["yhat"] = np.array(forecast_values)

        _LOGGER.debug("Creating Dummy Timestamps.....")
        maximum_time = max(data["ds"])
        dataframe_cols["timestamp"] = pd.date_range(
            maximum_time, periods=len(forecast_values), freq="30s")

        _LOGGER.debug("Calculating Bounds .... ")

        lower_bound, upper_bound = ct.calculate_bounds(forecast_values,
                                                       deviations)

        dataframe_cols["yhat_upper"] = upper_bound
        dataframe_cols["yhat_lower"] = lower_bound

        _LOGGER.debug("Formatting Forecast to Pandas ..... ")

        forecast = pd.DataFrame(data=dataframe_cols)
        forecast = forecast.set_index("timestamp")

        self.predicted_df = forecast
        _LOGGER.debug(forecast)
    async def get(self):
        """Fetch and publish metric values asynchronously."""
        # update metric value on every request and publish the metric
        for predictor_model in self.settings["model_list"]:
            # get the current metric value so that it can be compared with the
            # predicted values
            current_metric_value = Metric(
                pc.get_current_metric_value(
                    metric_name=predictor_model.metric.metric_name,
                    label_config=predictor_model.metric.label_config,
                )[0]
            )

            metric_name = predictor_model.metric.metric_name
            prediction = predictor_model.predict_value(datetime.now())

            # Check for all the columns available in the prediction
            # and publish the values for each of them
            for column_name in list(prediction.columns):
                GAUGE_DICT[metric_name].labels(
                    **predictor_model.metric.label_config, value_type=column_name
                ).set(prediction[column_name][0])

            # Calculate for an anomaly (can be different for different models)
            anomaly = 1
            if (
                current_metric_value.metric_values["y"][0] < prediction["yhat_upper"][0]
            ) and (
                current_metric_value.metric_values["y"][0] > prediction["yhat_lower"][0]
            ):
                anomaly = 0

            # create a new time series that has value_type=anomaly
            # this value is 1 if an anomaly is found 0 if not
            GAUGE_DICT[metric_name].labels(
                **predictor_model.metric.label_config, value_type="anomaly"
            ).set(anomaly)

        self.write(generate_latest(REGISTRY).decode("utf-8"))
        self.set_header("Content-Type", "text; charset=utf-8")
Exemple #15
0
    async def get(self):
        """Fetch and publish metric values asynchronously."""
        for predictor_model in self.settings["model_list"]:
            current_metric_value = Metric(
                pc.get_current_metric_value(
                    metric_name=predictor_model.metric.metric_name,
                    label_config=predictor_model.metric.label_config,
                )[0])

            metric_name = predictor_model.metric.metric_name
            prediction = predictor_model.predict_value(datetime.now() -
                                                       timedelta(hours=2))

            for column_name in list(prediction.columns):
                GAUGE_DICT[metric_name].labels(
                    **predictor_model.metric.label_config,
                    value_type=column_name).set(prediction[column_name][0])

            anomaly_detector = {
                "more":
                0 if current_metric_value.metric_values["y"][0] <
                prediction["yhat_upper"][0] + Configuration.deviations else 1,
                "less":
                0 if current_metric_value.metric_values["y"][0] >
                prediction["yhat_lower"][0] - Configuration.deviations else 1,
                "both":
                0 if prediction["yhat_upper"][0] + Configuration.deviations >
                current_metric_value.metric_values["y"][0] >
                prediction["yhat_lower"][0] - Configuration.deviations else 1,
            }

            anomaly = anomaly_detector.get(Configuration.anomaly_border)

            GAUGE_DICT[metric_name].labels(**
                                           predictor_model.metric.label_config,
                                           value_type="anomaly").set(anomaly)

        self.write(generate_latest(REGISTRY).decode("utf-8"))
        self.set_header("Content-Type", "text; charset=utf-8")
Exemple #16
0
    def test_metric_addition(self):  # noqa D102
        with self.assertRaises(TypeError,
                               msg="incorrect addition of two metrics"):
            _ = Metric(self.raw_metrics_list[0][0]) + Metric(
                self.raw_metrics_list[0][1])

        sum_metric = Metric(self.raw_metrics_list[0][0]) + Metric(
            self.raw_metrics_list[1][0])
        self.assertIsInstance(sum_metric,
                              Metric,
                              msg="The sum is not a Metric")
        self.assertEqual(
            sum_metric.start_time,
            Metric(self.raw_metrics_list[0][0]).start_time,
            "Incorrect Start time after addition",
        )
        self.assertEqual(
            sum_metric.end_time,
            Metric(self.raw_metrics_list[1][0]).end_time,
            "Incorrect End time after addition",
        )
                     str(Configuration.rolling_data_window_size))
    mlflow.log_param("true_anomaly_threshold",
                     str(Configuration.true_anomaly_threshold))

    # initial run with just the train data
    model_mp.train(train_data[0], Configuration.retraining_interval_minutes)

    # store the predicted dataframe
    predicted_df = model_mp.predicted_df
    # track true_positives & ground truth anomalies
    num_true_positives = 0
    num_ground_truth_anomalies = 0

    for item in range(len(test_data_list) - 1):
        # the true values for this training period
        true_values = Metric(test_data_list[item + 1])
        true_values.metric_values = true_values.metric_values.set_index("ds")

        # for each item in the test_data list, update the model (append new data and train it)
        model_mp.train(test_data_list[item], len(true_values.metric_values))

        # store the prediction df for every interval
        predicted_df = predicted_df + model_mp.predicted_df

        true_values.metric_values["yhat"] = model_mp.predicted_df["yhat"]
        true_values.metric_values["yhat_upper"] = model_mp.predicted_df[
            "yhat_upper"]
        true_values.metric_values["yhat_lower"] = model_mp.predicted_df[
            "yhat_lower"]

        metric_timestamp = true_values.metric_values.index.values[int(
Exemple #18
0
 def __init__(self, metric, rolling_data_window_size="10d"):
     """Initialize the Metric object."""
     self.metric = Metric(metric, rolling_data_window_size)
def update_values(models_include=None):
    """Update db_values for every TS.
    If Values record exists then updates its metric. If Values record does not exist then its created
    When Values record is created its predictor Model selected. Value record is associated with its TS.
    
    index (hash):
    {
        "metric" (Metric): first item of return value of MetricsList(get_metric_range_data())
        "ts" (tsKey): key of db_ts
        "model" (modelKey): key of db_models
    }

    Raises:
        Exception: [description]
        Exception: [description]
        Exception: [description]
        e: [description]
    """
    logger.info("Updating Values")
    now = datetime.now()
    generation = next(values_generation)
    for (h, ts) in db_ts.items():
        logger.debug("Updating [TS:{h}], labels:{labels}".format(
            h=h, labels=ts["labels"]))
        if h in db_values.keys():
            # TS is already tracked by a Values record in db_values
            current_start_time = now - Configuration.current_data_window_size
            record = db_values[h]
            metric = record["metric"]
            metric_data = pc.get_metric_range_data(
                metric_name=metric.metric_name,
                label_config=metric.label_config,
                start_time=current_start_time,
                end_time=now)
            metrics = MetricsList(metric_data)
            if len(metrics) != 1:
                raise Exception("There can be only one")
            new_metric = metrics[0] + metric

            trunk_metric = Metric(
                new_metric, current_start_time
            )  # This throws some exception really fast but this would have solved the problem.
            db_values[h]["metric"] = trunk_metric
            db_values[h]["generation"] = generation
            logger.debug(
                "Update and truncate [Metric:{h}] horizon:{current_start_time} metric_name:{metric_name}, label_config:{label_config}"
                .format(h=h,
                        metric_name=metric.metric_name,
                        label_config=metric.label_config,
                        current_start_time=current_start_time))
        else:
            current_start_time = now - Configuration.current_data_window_size
            metric_name = ts["labels"]["__name__"]
            labels = dict()
            labels.update(ts["labels"])
            del labels["__name__"]

            items = db_models.items()
            if not models_include is None:
                items = filter(lambda item: item[0] in models_include, items)

            models = list(
                filter(
                    lambda model: ts_hash(all_labels=model[1]["labels"]) == h,
                    items))
            if len(models) == 0:
                logger.warning(
                    "No models matching labels for [Metric:{h}] metric_name:{metric_name}, label_config:{label_config}"
                    .format(h=h, metric_name=metric_name, label_config=labels))
                continue

            metric_data = pc.get_metric_range_data(
                metric_name=metric_name,
                label_config=labels,
                start_time=current_start_time,
                end_time=now)
            metrics = MetricsList(metric_data)
            if len(metrics) != 1:
                raise Exception("There can be only one")

            # pick the most recent model
            models.sort(key=lambda model: model[1].get(
                "timestamp", datetime.fromtimestamp(0)),
                        reverse=True)
            predictor = models[0][0]
            # predictor.build_prediction_df()
            record = {
                "metric": metrics[0],
                "ts": h,
                "model": predictor,
                "generation": generation
            }
            db_values.update({h: record})
            logger.debug(
                "Add [Metric:{h}] horizon:{current_start_time} metric_name:{metric_name}, label_config:{label_config}"
                .format(h=h,
                        metric_name=metric_name,
                        label_config=labels,
                        current_start_time=current_start_time))
Exemple #20
0
    def train(self, metric_data=None, prediction_duration=15):
        """Train the model."""
        if metric_data:
            # because the rolling_data_window_size is set, this df should not bloat
            self.metric += Metric(metric_data)

        # normalising
        metric_values_np = self.metric.metric_values.values
        scaled_np_arr = self.scalar.fit_transform(metric_values_np[:,
                                                                   1].reshape(
                                                                       -1, 1))
        metric_values_np[:, 1] = scaled_np_arr.flatten()

        if self.parameter_tuning:
            x, y = self.prepare_data(metric_values_np)
            lstm_cells = [2**i for i in range(5, 8)]
            dense_cells = [2**i for i in range(5, 8)]
            loss = np.inf
            lstm_cell_count = 0
            dense_cell_count = 0
            for lstm_cell_count_ in lstm_cells:
                for dense_cell_count_ in dense_cells:
                    model = self.get_model(lstm_cell_count_, dense_cell_count_)
                    model.compile(loss='mean_squared_error', optimizer='adam')
                    history = model.fit(x,
                                        y,
                                        epochs=50,
                                        batch_size=512,
                                        verbose=0,
                                        validation_split=self.validation_ratio)
                    val_loss = history.history['val_loss']
                    loss_ = min(val_loss)
                    if loss > loss_:
                        lstm_cell_count = lstm_cell_count_
                        dense_cell_count = dense_cell_count_
                        loss = loss_
            self.lstm_cell_count = lstm_cell_count
            self.dense_cell_count = dense_cell_count
            self.parameter_tuning = False

        model = self.get_model(self.lstm_cell_count, self.dense_cell_count)
        _LOGGER.info("training data range: %s - %s", self.metric.start_time,
                     self.metric.end_time)
        # _LOGGER.info("training data end time: %s", self.metric.end_time)
        _LOGGER.debug("begin training")
        data_x, data_y = self.prepare_data(metric_values_np)
        _LOGGER.debug(data_x.shape)
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(data_x, data_y, epochs=50, batch_size=512)
        data_test = metric_values_np[-self.number_of_features:, 1]
        forecast_values = []
        prev_value = data_test[-1]
        for i in range(int(prediction_duration)):
            prediction = model.predict(
                data_test.reshape(1, 1, self.number_of_features)).flatten()[0]
            curr_pred_value = data_test[-1] + prediction
            scaled_final_value = self.scalar.inverse_transform(
                curr_pred_value.reshape(1, -1)).flatten()[0]
            forecast_values.append(scaled_final_value)
            data_test = np.roll(data_test, -1)
            data_test[-1] = curr_pred_value
            prev_value = data_test[-1]

        dataframe_cols = {"yhat": np.array(forecast_values)}

        upper_bound = np.array([
            (forecast_values[i] + (np.std(forecast_values[:i]) * 2))
            for i in range(len(forecast_values))
        ])
        upper_bound[0] = np.mean(
            forecast_values[0])  # to account for no std of a single value
        lower_bound = np.array([
            (forecast_values[i] - (np.std(forecast_values[:i]) * 2))
            for i in range(len(forecast_values))
        ])
        lower_bound[0] = np.mean(
            forecast_values[0])  # to account for no std of a single value
        dataframe_cols["yhat_upper"] = upper_bound
        dataframe_cols["yhat_lower"] = lower_bound

        data = self.metric.metric_values
        maximum_time = max(data["ds"])
        dataframe_cols["timestamp"] = pd.date_range(
            maximum_time, periods=len(forecast_values), freq="min")

        forecast = pd.DataFrame(data=dataframe_cols)
        forecast = forecast.set_index("timestamp")

        self.predicted_df = forecast
        _LOGGER.debug(forecast)
Exemple #21
0
 def test_init(self):
     test_metric_object = Metric(self.raw_metrics_list[0][0])
     self.assertEqual("up", test_metric_object.metric_name,
                      "incorrect metric name")
Exemple #22
0
    # log parameters before run
    mlflow.log_param("retraining_interval_minutes",
                     str(Configuration.retraining_interval_minutes))
    mlflow.log_param("rolling_training_window_size",
                     str(Configuration.rolling_training_window_size))
    mlflow.log_param("true_anomaly_threshold",
                     str(Configuration.true_anomaly_threshold))

    # initial run with just the train data
    model_mp.train(
        prediction_duration=Configuration.retraining_interval_minutes)

    # store the predicted dataframe and the true dataframe
    predicted_df = model_mp.predicted_df
    true_df = Metric(test_data_list[0]).metric_values.set_index("ds")

    # Label True Anomalies
    true_df["anomaly"] = label_true_anomalies(
        true_df, Configuration.true_anomaly_threshold)

    # track true_positives & ground truth anomalies
    num_true_positives = 0
    num_ground_truth_anomalies = 0

    for item in range(len(test_data_list) - 1):
        # the true values for this training period
        true_values = Metric(test_data_list[item + 1])
        true_values.metric_values = true_values.metric_values.set_index("ds")
        true_df += true_values.metric_values
 def __init__(self, metric, rolling_data_window_size="10d"):
     self.metric = Metric(metric, rolling_data_window_size)
Exemple #24
0
    disable_ssl=True,
)

_LOGGER.info("Metric List size: %s", len(METRICS_LIST))
for metric in METRICS_LIST:
    # Initialize a predictor for all metrics first
    _LOGGER.info("Metric List read: %s", metric)
    current_start_time = datetime.now(
    ) - Configuration.current_data_window_size
    metric_init = pc.get_metric_range_data(metric_name=metric,
                                           start_time=current_start_time,
                                           end_time=datetime.now())
    _LOGGER.info("Mertic loop: %s", metric_init)

    metric_list = map(
        lambda metric: Metric(metric, Configuration.
                              rolling_training_window_size), metric_init)
    PREDICTOR_MODEL_LIST.extend(
        zip(
            metric_list,
            itertools.starmap(Configuration.model_module.MetricPredictor,
                              itertools.repeat([]))))


def train_model():
    """Train the machine learning model.
    Traning interval rounds up to day starts (00h:00m:00s.00)
    """
    _LOGGER.info("Train function: %s", PREDICTOR_MODEL_LIST)
    for (metric_to_predict, predictor_model) in PREDICTOR_MODEL_LIST:
        today = datetime(*datetime.now().timetuple()[:3])
        data_start_time = today - Configuration.rolling_training_window_size
Exemple #25
0
    async def get(self):
        """Fetch and publish metric values asynchronously."""
        # update metric value on every request and publish the metric
        for predictor_model in self.settings["model_list"]:
            # get the current metric value so that it can be compared with the
            # predicted values
            current_start_time = datetime.now(
            ) - Configuration.current_data_window_size
            current_end_time = datetime.now()

            weekago_start_time = (datetime.now() - timedelta(days=7)
                                  ) - Configuration.current_data_window_size
            weekago_end_time = (datetime.now() - timedelta(days=7))

            twoweeksago_start_time = (datetime.now() - timedelta(
                days=14)) - Configuration.current_data_window_size
            twoweeksago_end_time = (datetime.now() - timedelta(days=14))

            trust_prediction = 0
            anomaly = 1

            _LOGGER.info(
                "MatricName = %s, label_config = %s, start_time = %s, end_time = %s",
                predictor_model.metric.metric_name,
                predictor_model.metric.label_config, current_start_time,
                current_end_time)

            prediction_data_size = 0
            metric_name = predictor_model.metric.metric_name
            prediction = predictor_model.predict_value(datetime.now())

            if "size" in prediction:
                prediction_data_size = prediction['size']

            weekago_metric_data = pc.get_metric_range_data(
                metric_name=predictor_model.metric.metric_name,
                label_config=predictor_model.metric.label_config,
                start_time=weekago_start_time,
                end_time=weekago_end_time,
            )

            if weekago_metric_data and hasattr(weekago_metric_data, "__len__"):
                weekago_metric_value = Metric(weekago_metric_data[0])
                if (weekago_metric_value.metric_values.loc[
                        weekago_metric_value.metric_values.ds.idxmax(), "y"] <
                        prediction["yhat_upper"][0]) and (
                            weekago_metric_value.metric_values.loc[
                                weekago_metric_value.metric_values.ds.idxmax(),
                                "y"] > prediction["yhat_lower"][0]):
                    trust_prediction = 1

            twoweeksago_metric_data = pc.get_metric_range_data(
                metric_name=predictor_model.metric.metric_name,
                label_config=predictor_model.metric.label_config,
                start_time=twoweeksago_start_time,
                end_time=twoweeksago_end_time,
            )

            if twoweeksago_metric_data and hasattr(twoweeksago_metric_data,
                                                   "__len__"):
                twoweeksago_metric_value = Metric(twoweeksago_metric_data[0])
                if (twoweeksago_metric_value.metric_values.loc[
                        twoweeksago_metric_value.metric_values.ds.idxmax(),
                        "y"] < prediction["yhat_upper"][0]
                    ) and (twoweeksago_metric_value.metric_values.loc[
                        twoweeksago_metric_value.metric_values.ds.idxmax(),
                        "y"] > prediction["yhat_lower"][0]):
                    trust_prediction = 1

            current_metric_data = pc.get_metric_range_data(
                metric_name=predictor_model.metric.metric_name,
                label_config=predictor_model.metric.label_config,
                start_time=current_start_time,
                end_time=current_end_time,
            )

            # Check for all the columns available in the prediction
            # and publish the values for each of them
            for column_name in list(prediction.columns):
                GAUGE_DICT[metric_name].labels(
                    **predictor_model.metric.label_config,
                    value_type=column_name).set(prediction[column_name][0])

            if current_metric_data and hasattr(current_metric_data, "__len__"):
                current_metric_value = Metric(current_metric_data[0])
                if (current_metric_value.metric_values.loc[
                        current_metric_value.metric_values.ds.idxmax(), "y"] <
                        prediction["yhat_upper"][0]) and (
                            current_metric_value.metric_values.loc[
                                current_metric_value.metric_values.ds.idxmax(),
                                "y"] > prediction["yhat_lower"][0]):
                    anomaly = 0
                elif trust_prediction == 0:
                    anomaly = 0

                # create a new time series that has value_type=anomaly
                # this value is 1 if an anomaly is found 0 if not
                GAUGE_DICT[metric_name].labels(
                    **predictor_model.metric.label_config,
                    value_type="anomaly").set(anomaly)

                _LOGGER.info(
                    "Got current values in Mainhandler = %s and newest value = %s, IDXMAX = %s",
                    current_metric_value.metric_values,
                    current_metric_value.metric_values.loc[
                        current_metric_value.metric_values.ds.idxmax(), 'y'],
                    current_metric_value.metric_values.ds.idxmax())

            GAUGE_DICT[metric_name].labels(
                **predictor_model.metric.label_config,
                value_type="size").set(prediction_data_size)

        self.write(generate_latest(REGISTRY).decode("utf-8"))
        self.set_header("Content-Type", "text; charset=utf-8")
Exemple #26
0
 def get_metric_obj(metric_data):
     return Metric(metric_data)