def test_metric_equality(self): self.assertEqual( Metric(self.raw_metrics_list[0][0]), Metric(self.raw_metrics_list[1][0]), "incorrect inequality", ) self.assertNotEqual( Metric(self.raw_metrics_list[0][0]), Metric(self.raw_metrics_list[0][1]), "incorrect equality", )
def test_oldest_data_datetime_with_timedelta(self): expected_start_time = Metric( self.raw_metrics_list[0][0]).metric_values.iloc[4, 0] time_delta = ( Metric(self.raw_metrics_list[1][0]).metric_values.iloc[-1, 0] - Metric(self.raw_metrics_list[0][0]).metric_values.iloc[4, 0]) new_metric = Metric(self.raw_metrics_list[0][0], oldest_data_datetime=time_delta) + Metric( self.raw_metrics_list[1][0]) self.assertEqual(expected_start_time, new_metric.start_time, "Incorrect Start time after addition")
def train(self, metric_dict, oldest_data_datetime): """Train the Prophet model and store the predictions in predicted_df.""" prediction_freq = "1MIN" # convert incoming metric to Metric Object metric = Metric(metric_dict, oldest_data_datetime) self._model = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True) _LOGGER.info("training data range: %s - %s", metric.start_time, metric.end_time) _LOGGER.debug("begin training") df_fit = self._model.fit(metric.metric_values) if True: df_cv = cross_validation(self._model, horizon="1 day", period="8 hours", initial="4 days") df_p = performance_metrics(df_cv) _LOGGER.info("Performance data: %s %s", metric.metric_name, df_p)
def train(self, metric_data=None, prediction_duration=15): """Train the Prophet model and store the predictions in predicted_df.""" prediction_freq = "1MIN" # convert incoming metric to Metric Object if metric_data: # because the rolling_data_window_size is set, this df should not bloat self.metric += Metric(metric_data) # Don't really need to store the model, as prophet models are not retrainable # But storing it as an example for other models that can be retrained self.model = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True) _LOGGER.info("training data range: %s - %s", self.metric.start_time, self.metric.end_time) # _LOGGER.info("training data end time: %s", self.metric.end_time) _LOGGER.debug("begin training") self.model.fit(self.metric.metric_values) future = self.model.make_future_dataframe( periods=int(prediction_duration), freq=prediction_freq, include_history=False, ) forecast = self.model.predict(future) forecast["timestamp"] = forecast["ds"] forecast = forecast[["timestamp", "yhat", "yhat_lower", "yhat_upper"]] forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast)
def train(self, metric_data=None, prediction_duration=15, seasonality=None, deviations=3): """Train the Prophet model and store the predictions in predicted_df.""" prediction_freq = "30s" if metric_data: self.metric += Metric(metric_data) self.model = Prophet(daily_seasonality=seasonality == "daily", weekly_seasonality=seasonality == "weekly", yearly_seasonality=seasonality == "yearly") _LOGGER.info("training data range: %s - %s", self.metric.start_time, self.metric.end_time) _LOGGER.debug("begin training") self.model.fit(self.metric.metric_values) future = self.model.make_future_dataframe( periods=int(prediction_duration), freq=prediction_freq, include_history=False, ) forecast = self.model.predict(future) forecast["timestamp"] = forecast["ds"] forecast = forecast[["timestamp", "yhat", "yhat_lower", "yhat_upper"]] forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast)
async def get(self): """Fetch and publish metric values asynchronously.""" # update metric value on every request and publish the metric for predictor_model in self.settings["model_list"]: # get the current metric value so that it can be compared with the # predicted values current_start_time = datetime.now( ) - Configuration.current_data_window_size current_end_time = datetime.now() anomaly = 0 prediction_data_size = 0 metric_name = predictor_model.metric.metric_name prediction = predictor_model.predict_value(datetime.now()) if "size" in prediction: prediction_data_size = prediction['size'] current_metric_data = pc.get_metric_range_data( metric_name=predictor_model.metric.metric_name, label_config=predictor_model.metric.label_config, start_time=current_start_time, end_time=current_end_time, ) # Check for all the columns available in the prediction # and publish the values for each of them for column_name in list(prediction.columns): GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type=column_name).set(prediction[column_name][0]) if current_metric_data and hasattr(current_metric_data, "__len__"): current_metric_value = Metric(current_metric_data[0]) uncertainty_range = prediction["yhat_upper"][0] - prediction[ "yhat_lower"][0] current_value = current_metric_value.metric_values.loc[ current_metric_value.metric_values.ds.idxmax(), "y"] if (current_value > prediction["yhat_upper"][0]): anomaly = (current_value - prediction["yhat_upper"][0]) / uncertainty_range elif (current_value < prediction["yhat_lower"][0]): anomaly = (current_value - prediction["yhat_lower"][0]) / uncertainty_range # create a new time series that has value_type=anomaly # this value is 1 if an anomaly is found 0 if not GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type="anomaly").set(anomaly) GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type="size").set(prediction_data_size) self.write(generate_latest(REGISTRY).decode("utf-8")) self.set_header("Content-Type", "text; charset=utf-8")
def test_oldest_data_datetime_with_datetime(self): with self.assertRaises(TypeError, msg="incorrect parameter type accepted"): _ = Metric(self.raw_metrics_list[0][0], oldest_data_datetime="2d") expected_start_time = Metric( self.raw_metrics_list[0][0]).metric_values.iloc[4, 0] new_metric = Metric(self.raw_metrics_list[0][0], oldest_data_datetime=expected_start_time) + Metric( self.raw_metrics_list[1][0]) self.assertEqual(expected_start_time, new_metric.start_time, "Incorrect Start time after addition") self.assertEqual( expected_start_time, new_metric.metric_values.iloc[0, 0], "Incorrect Start time after addition (in df)", )
def test_metric_end_time(self): end_time = datetime.datetime(2019, 7, 28, 16, 00) end_time_minus_1m = datetime.datetime(2019, 7, 28, 15, 59) test_metric_object = Metric(self.raw_metrics_list[0][0]) self.assertTrue(test_metric_object.end_time > end_time_minus_1m, "incorrect metric end time") self.assertTrue(test_metric_object.end_time < end_time, "incorrect metric end time")
def test_metric_start_time(self): start_time = datetime.datetime(2019, 7, 28, 10, 0) start_time_plus_1m = datetime.datetime(2019, 7, 28, 10, 1) test_metric_object = Metric(self.raw_metrics_list[0][0]) self.assertTrue(test_metric_object.start_time > start_time, "incorrect metric start time") self.assertTrue(test_metric_object.start_time < start_time_plus_1m, "incorrect metric start time")
def __init__(self, metric, rolling_data_window_size="10d", number_of_feature=10, validation_ratio=0.2, parameter_tuning=True): """Initialize the Metric object.""" self.metric = Metric(metric, rolling_data_window_size) self.number_of_features = number_of_feature self.scalar = MinMaxScaler(feature_range=(0, 1)) self.parameter_tuning = parameter_tuning self.validation_ratio = validation_ratio
def train(self, metric_data=None, prediction_duration=15): """Train the Fourier model and store the predictions in pandas dataframe.""" prediction_range = prediction_duration # convert incoming metric to Metric Object if metric_data: # because the rolling_data_window_size is set, this df should not bloat self.metric += Metric(metric_data) data = self.metric.metric_values vals = np.array(data["y"].tolist()) _LOGGER.debug("training data start time: %s", self.metric.start_time) _LOGGER.debug("training data end time: %s", self.metric.end_time) _LOGGER.debug("begin training") forecast_values = self.fourier_extrapolation(vals, prediction_range, 1) # int(len(vals)/3)) dataframe_cols = {} dataframe_cols["yhat"] = np.array(forecast_values) # find most recent timestamp from original data and extrapolate new timestamps _LOGGER.debug("Creating Dummy Timestamps.....") maximum_time = max(data["ds"]) dataframe_cols["timestamp"] = pd.date_range( maximum_time, periods=len(forecast_values), freq="min") # create dummy upper and lower bounds _LOGGER.debug("Computing Bounds .... ") upper_bound = np.array([(np.ma.average( forecast_values[:i], weights=np.linspace(0, 1, num=len(forecast_values[:i])), ) + (np.std(forecast_values[:i]) * 2)) for i in range(len(forecast_values))]) upper_bound[0] = np.mean( forecast_values[0]) # to account for no std of a single value lower_bound = np.array([(np.ma.average( forecast_values[:i], weights=np.linspace(0, 1, num=len(forecast_values[:i])), ) - (np.std(forecast_values[:i]) * 2)) for i in range(len(forecast_values))]) lower_bound[0] = np.mean( forecast_values[0]) # to account for no std of a single value dataframe_cols["yhat_upper"] = upper_bound dataframe_cols["yhat_lower"] = lower_bound # create series and index into predictions_dict _LOGGER.debug("Formatting Forecast to Pandas ..... ") forecast = pd.DataFrame(data=dataframe_cols) forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast)
def train(self, metric_data=None, prediction_duration=15, seasonality=None, deviations=3): """Train the Sarima model and store the predictions in predicted_df.""" if metric_data: self.metric += Metric(metric_data) data = self.metric.metric_values values = pd.Series(self.metric.metric_values.y.values, index=data["ds"]) days = {"daily": 2, "weekly": 7, "yearly": 12} self.model = SARIMAX(values, order=(0, 0, 0), seasonal_order=(1, 1, 1, days.get(seasonality))) _LOGGER.info("training data range: %s - %s", self.metric.start_time, self.metric.end_time) _LOGGER.debug("begin training") results = self.model.fit(method='powell') forecast = results.forecast(prediction_duration) dataframe_cols = {} dataframe_cols["yhat"] = np.append(values.get(-1), np.array(forecast)) _LOGGER.debug("Creating Dummy Timestamps.....") maximum_time = max(data["ds"]) dataframe_cols["timestamp"] = pd.date_range(maximum_time, periods=len(forecast) + 1, freq="30s") _LOGGER.debug("Computing Bounds .... ") lower_bound, upper_bound = ct.calculate_bounds(forecast, deviations) dataframe_cols["yhat_upper"] = np.append(values.get(-1), upper_bound) dataframe_cols["yhat_lower"] = np.append(values.get(-1), lower_bound) _LOGGER.debug("Formatting Forecast to Pandas ..... ") forecast = pd.DataFrame(data=dataframe_cols) forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast)
def train(self, metric_data=None, prediction_duration=15, seasonality=None, deviations=3): """Train the Fourier model and store the predictions in pandas dataframe.""" prediction_range = prediction_duration if metric_data: self.metric += Metric(metric_data) data = self.metric.metric_values vals = np.array(data["y"].tolist()) _LOGGER.debug("training data start time: %s", self.metric.start_time) _LOGGER.debug("training data end time: %s", self.metric.end_time) _LOGGER.debug("begin training") forecast_values = self.fourier_extrapolation(vals, prediction_range, 1) dataframe_cols = {} dataframe_cols["yhat"] = np.array(forecast_values) _LOGGER.debug("Creating Dummy Timestamps.....") maximum_time = max(data["ds"]) dataframe_cols["timestamp"] = pd.date_range( maximum_time, periods=len(forecast_values), freq="30s") _LOGGER.debug("Calculating Bounds .... ") lower_bound, upper_bound = ct.calculate_bounds(forecast_values, deviations) dataframe_cols["yhat_upper"] = upper_bound dataframe_cols["yhat_lower"] = lower_bound _LOGGER.debug("Formatting Forecast to Pandas ..... ") forecast = pd.DataFrame(data=dataframe_cols) forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast)
async def get(self): """Fetch and publish metric values asynchronously.""" # update metric value on every request and publish the metric for predictor_model in self.settings["model_list"]: # get the current metric value so that it can be compared with the # predicted values current_metric_value = Metric( pc.get_current_metric_value( metric_name=predictor_model.metric.metric_name, label_config=predictor_model.metric.label_config, )[0] ) metric_name = predictor_model.metric.metric_name prediction = predictor_model.predict_value(datetime.now()) # Check for all the columns available in the prediction # and publish the values for each of them for column_name in list(prediction.columns): GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type=column_name ).set(prediction[column_name][0]) # Calculate for an anomaly (can be different for different models) anomaly = 1 if ( current_metric_value.metric_values["y"][0] < prediction["yhat_upper"][0] ) and ( current_metric_value.metric_values["y"][0] > prediction["yhat_lower"][0] ): anomaly = 0 # create a new time series that has value_type=anomaly # this value is 1 if an anomaly is found 0 if not GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type="anomaly" ).set(anomaly) self.write(generate_latest(REGISTRY).decode("utf-8")) self.set_header("Content-Type", "text; charset=utf-8")
async def get(self): """Fetch and publish metric values asynchronously.""" for predictor_model in self.settings["model_list"]: current_metric_value = Metric( pc.get_current_metric_value( metric_name=predictor_model.metric.metric_name, label_config=predictor_model.metric.label_config, )[0]) metric_name = predictor_model.metric.metric_name prediction = predictor_model.predict_value(datetime.now() - timedelta(hours=2)) for column_name in list(prediction.columns): GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type=column_name).set(prediction[column_name][0]) anomaly_detector = { "more": 0 if current_metric_value.metric_values["y"][0] < prediction["yhat_upper"][0] + Configuration.deviations else 1, "less": 0 if current_metric_value.metric_values["y"][0] > prediction["yhat_lower"][0] - Configuration.deviations else 1, "both": 0 if prediction["yhat_upper"][0] + Configuration.deviations > current_metric_value.metric_values["y"][0] > prediction["yhat_lower"][0] - Configuration.deviations else 1, } anomaly = anomaly_detector.get(Configuration.anomaly_border) GAUGE_DICT[metric_name].labels(** predictor_model.metric.label_config, value_type="anomaly").set(anomaly) self.write(generate_latest(REGISTRY).decode("utf-8")) self.set_header("Content-Type", "text; charset=utf-8")
def test_metric_addition(self): # noqa D102 with self.assertRaises(TypeError, msg="incorrect addition of two metrics"): _ = Metric(self.raw_metrics_list[0][0]) + Metric( self.raw_metrics_list[0][1]) sum_metric = Metric(self.raw_metrics_list[0][0]) + Metric( self.raw_metrics_list[1][0]) self.assertIsInstance(sum_metric, Metric, msg="The sum is not a Metric") self.assertEqual( sum_metric.start_time, Metric(self.raw_metrics_list[0][0]).start_time, "Incorrect Start time after addition", ) self.assertEqual( sum_metric.end_time, Metric(self.raw_metrics_list[1][0]).end_time, "Incorrect End time after addition", )
str(Configuration.rolling_data_window_size)) mlflow.log_param("true_anomaly_threshold", str(Configuration.true_anomaly_threshold)) # initial run with just the train data model_mp.train(train_data[0], Configuration.retraining_interval_minutes) # store the predicted dataframe predicted_df = model_mp.predicted_df # track true_positives & ground truth anomalies num_true_positives = 0 num_ground_truth_anomalies = 0 for item in range(len(test_data_list) - 1): # the true values for this training period true_values = Metric(test_data_list[item + 1]) true_values.metric_values = true_values.metric_values.set_index("ds") # for each item in the test_data list, update the model (append new data and train it) model_mp.train(test_data_list[item], len(true_values.metric_values)) # store the prediction df for every interval predicted_df = predicted_df + model_mp.predicted_df true_values.metric_values["yhat"] = model_mp.predicted_df["yhat"] true_values.metric_values["yhat_upper"] = model_mp.predicted_df[ "yhat_upper"] true_values.metric_values["yhat_lower"] = model_mp.predicted_df[ "yhat_lower"] metric_timestamp = true_values.metric_values.index.values[int(
def __init__(self, metric, rolling_data_window_size="10d"): """Initialize the Metric object.""" self.metric = Metric(metric, rolling_data_window_size)
def update_values(models_include=None): """Update db_values for every TS. If Values record exists then updates its metric. If Values record does not exist then its created When Values record is created its predictor Model selected. Value record is associated with its TS. index (hash): { "metric" (Metric): first item of return value of MetricsList(get_metric_range_data()) "ts" (tsKey): key of db_ts "model" (modelKey): key of db_models } Raises: Exception: [description] Exception: [description] Exception: [description] e: [description] """ logger.info("Updating Values") now = datetime.now() generation = next(values_generation) for (h, ts) in db_ts.items(): logger.debug("Updating [TS:{h}], labels:{labels}".format( h=h, labels=ts["labels"])) if h in db_values.keys(): # TS is already tracked by a Values record in db_values current_start_time = now - Configuration.current_data_window_size record = db_values[h] metric = record["metric"] metric_data = pc.get_metric_range_data( metric_name=metric.metric_name, label_config=metric.label_config, start_time=current_start_time, end_time=now) metrics = MetricsList(metric_data) if len(metrics) != 1: raise Exception("There can be only one") new_metric = metrics[0] + metric trunk_metric = Metric( new_metric, current_start_time ) # This throws some exception really fast but this would have solved the problem. db_values[h]["metric"] = trunk_metric db_values[h]["generation"] = generation logger.debug( "Update and truncate [Metric:{h}] horizon:{current_start_time} metric_name:{metric_name}, label_config:{label_config}" .format(h=h, metric_name=metric.metric_name, label_config=metric.label_config, current_start_time=current_start_time)) else: current_start_time = now - Configuration.current_data_window_size metric_name = ts["labels"]["__name__"] labels = dict() labels.update(ts["labels"]) del labels["__name__"] items = db_models.items() if not models_include is None: items = filter(lambda item: item[0] in models_include, items) models = list( filter( lambda model: ts_hash(all_labels=model[1]["labels"]) == h, items)) if len(models) == 0: logger.warning( "No models matching labels for [Metric:{h}] metric_name:{metric_name}, label_config:{label_config}" .format(h=h, metric_name=metric_name, label_config=labels)) continue metric_data = pc.get_metric_range_data( metric_name=metric_name, label_config=labels, start_time=current_start_time, end_time=now) metrics = MetricsList(metric_data) if len(metrics) != 1: raise Exception("There can be only one") # pick the most recent model models.sort(key=lambda model: model[1].get( "timestamp", datetime.fromtimestamp(0)), reverse=True) predictor = models[0][0] # predictor.build_prediction_df() record = { "metric": metrics[0], "ts": h, "model": predictor, "generation": generation } db_values.update({h: record}) logger.debug( "Add [Metric:{h}] horizon:{current_start_time} metric_name:{metric_name}, label_config:{label_config}" .format(h=h, metric_name=metric_name, label_config=labels, current_start_time=current_start_time))
def train(self, metric_data=None, prediction_duration=15): """Train the model.""" if metric_data: # because the rolling_data_window_size is set, this df should not bloat self.metric += Metric(metric_data) # normalising metric_values_np = self.metric.metric_values.values scaled_np_arr = self.scalar.fit_transform(metric_values_np[:, 1].reshape( -1, 1)) metric_values_np[:, 1] = scaled_np_arr.flatten() if self.parameter_tuning: x, y = self.prepare_data(metric_values_np) lstm_cells = [2**i for i in range(5, 8)] dense_cells = [2**i for i in range(5, 8)] loss = np.inf lstm_cell_count = 0 dense_cell_count = 0 for lstm_cell_count_ in lstm_cells: for dense_cell_count_ in dense_cells: model = self.get_model(lstm_cell_count_, dense_cell_count_) model.compile(loss='mean_squared_error', optimizer='adam') history = model.fit(x, y, epochs=50, batch_size=512, verbose=0, validation_split=self.validation_ratio) val_loss = history.history['val_loss'] loss_ = min(val_loss) if loss > loss_: lstm_cell_count = lstm_cell_count_ dense_cell_count = dense_cell_count_ loss = loss_ self.lstm_cell_count = lstm_cell_count self.dense_cell_count = dense_cell_count self.parameter_tuning = False model = self.get_model(self.lstm_cell_count, self.dense_cell_count) _LOGGER.info("training data range: %s - %s", self.metric.start_time, self.metric.end_time) # _LOGGER.info("training data end time: %s", self.metric.end_time) _LOGGER.debug("begin training") data_x, data_y = self.prepare_data(metric_values_np) _LOGGER.debug(data_x.shape) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(data_x, data_y, epochs=50, batch_size=512) data_test = metric_values_np[-self.number_of_features:, 1] forecast_values = [] prev_value = data_test[-1] for i in range(int(prediction_duration)): prediction = model.predict( data_test.reshape(1, 1, self.number_of_features)).flatten()[0] curr_pred_value = data_test[-1] + prediction scaled_final_value = self.scalar.inverse_transform( curr_pred_value.reshape(1, -1)).flatten()[0] forecast_values.append(scaled_final_value) data_test = np.roll(data_test, -1) data_test[-1] = curr_pred_value prev_value = data_test[-1] dataframe_cols = {"yhat": np.array(forecast_values)} upper_bound = np.array([ (forecast_values[i] + (np.std(forecast_values[:i]) * 2)) for i in range(len(forecast_values)) ]) upper_bound[0] = np.mean( forecast_values[0]) # to account for no std of a single value lower_bound = np.array([ (forecast_values[i] - (np.std(forecast_values[:i]) * 2)) for i in range(len(forecast_values)) ]) lower_bound[0] = np.mean( forecast_values[0]) # to account for no std of a single value dataframe_cols["yhat_upper"] = upper_bound dataframe_cols["yhat_lower"] = lower_bound data = self.metric.metric_values maximum_time = max(data["ds"]) dataframe_cols["timestamp"] = pd.date_range( maximum_time, periods=len(forecast_values), freq="min") forecast = pd.DataFrame(data=dataframe_cols) forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast)
def test_init(self): test_metric_object = Metric(self.raw_metrics_list[0][0]) self.assertEqual("up", test_metric_object.metric_name, "incorrect metric name")
# log parameters before run mlflow.log_param("retraining_interval_minutes", str(Configuration.retraining_interval_minutes)) mlflow.log_param("rolling_training_window_size", str(Configuration.rolling_training_window_size)) mlflow.log_param("true_anomaly_threshold", str(Configuration.true_anomaly_threshold)) # initial run with just the train data model_mp.train( prediction_duration=Configuration.retraining_interval_minutes) # store the predicted dataframe and the true dataframe predicted_df = model_mp.predicted_df true_df = Metric(test_data_list[0]).metric_values.set_index("ds") # Label True Anomalies true_df["anomaly"] = label_true_anomalies( true_df, Configuration.true_anomaly_threshold) # track true_positives & ground truth anomalies num_true_positives = 0 num_ground_truth_anomalies = 0 for item in range(len(test_data_list) - 1): # the true values for this training period true_values = Metric(test_data_list[item + 1]) true_values.metric_values = true_values.metric_values.set_index("ds") true_df += true_values.metric_values
def __init__(self, metric, rolling_data_window_size="10d"): self.metric = Metric(metric, rolling_data_window_size)
disable_ssl=True, ) _LOGGER.info("Metric List size: %s", len(METRICS_LIST)) for metric in METRICS_LIST: # Initialize a predictor for all metrics first _LOGGER.info("Metric List read: %s", metric) current_start_time = datetime.now( ) - Configuration.current_data_window_size metric_init = pc.get_metric_range_data(metric_name=metric, start_time=current_start_time, end_time=datetime.now()) _LOGGER.info("Mertic loop: %s", metric_init) metric_list = map( lambda metric: Metric(metric, Configuration. rolling_training_window_size), metric_init) PREDICTOR_MODEL_LIST.extend( zip( metric_list, itertools.starmap(Configuration.model_module.MetricPredictor, itertools.repeat([])))) def train_model(): """Train the machine learning model. Traning interval rounds up to day starts (00h:00m:00s.00) """ _LOGGER.info("Train function: %s", PREDICTOR_MODEL_LIST) for (metric_to_predict, predictor_model) in PREDICTOR_MODEL_LIST: today = datetime(*datetime.now().timetuple()[:3]) data_start_time = today - Configuration.rolling_training_window_size
async def get(self): """Fetch and publish metric values asynchronously.""" # update metric value on every request and publish the metric for predictor_model in self.settings["model_list"]: # get the current metric value so that it can be compared with the # predicted values current_start_time = datetime.now( ) - Configuration.current_data_window_size current_end_time = datetime.now() weekago_start_time = (datetime.now() - timedelta(days=7) ) - Configuration.current_data_window_size weekago_end_time = (datetime.now() - timedelta(days=7)) twoweeksago_start_time = (datetime.now() - timedelta( days=14)) - Configuration.current_data_window_size twoweeksago_end_time = (datetime.now() - timedelta(days=14)) trust_prediction = 0 anomaly = 1 _LOGGER.info( "MatricName = %s, label_config = %s, start_time = %s, end_time = %s", predictor_model.metric.metric_name, predictor_model.metric.label_config, current_start_time, current_end_time) prediction_data_size = 0 metric_name = predictor_model.metric.metric_name prediction = predictor_model.predict_value(datetime.now()) if "size" in prediction: prediction_data_size = prediction['size'] weekago_metric_data = pc.get_metric_range_data( metric_name=predictor_model.metric.metric_name, label_config=predictor_model.metric.label_config, start_time=weekago_start_time, end_time=weekago_end_time, ) if weekago_metric_data and hasattr(weekago_metric_data, "__len__"): weekago_metric_value = Metric(weekago_metric_data[0]) if (weekago_metric_value.metric_values.loc[ weekago_metric_value.metric_values.ds.idxmax(), "y"] < prediction["yhat_upper"][0]) and ( weekago_metric_value.metric_values.loc[ weekago_metric_value.metric_values.ds.idxmax(), "y"] > prediction["yhat_lower"][0]): trust_prediction = 1 twoweeksago_metric_data = pc.get_metric_range_data( metric_name=predictor_model.metric.metric_name, label_config=predictor_model.metric.label_config, start_time=twoweeksago_start_time, end_time=twoweeksago_end_time, ) if twoweeksago_metric_data and hasattr(twoweeksago_metric_data, "__len__"): twoweeksago_metric_value = Metric(twoweeksago_metric_data[0]) if (twoweeksago_metric_value.metric_values.loc[ twoweeksago_metric_value.metric_values.ds.idxmax(), "y"] < prediction["yhat_upper"][0] ) and (twoweeksago_metric_value.metric_values.loc[ twoweeksago_metric_value.metric_values.ds.idxmax(), "y"] > prediction["yhat_lower"][0]): trust_prediction = 1 current_metric_data = pc.get_metric_range_data( metric_name=predictor_model.metric.metric_name, label_config=predictor_model.metric.label_config, start_time=current_start_time, end_time=current_end_time, ) # Check for all the columns available in the prediction # and publish the values for each of them for column_name in list(prediction.columns): GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type=column_name).set(prediction[column_name][0]) if current_metric_data and hasattr(current_metric_data, "__len__"): current_metric_value = Metric(current_metric_data[0]) if (current_metric_value.metric_values.loc[ current_metric_value.metric_values.ds.idxmax(), "y"] < prediction["yhat_upper"][0]) and ( current_metric_value.metric_values.loc[ current_metric_value.metric_values.ds.idxmax(), "y"] > prediction["yhat_lower"][0]): anomaly = 0 elif trust_prediction == 0: anomaly = 0 # create a new time series that has value_type=anomaly # this value is 1 if an anomaly is found 0 if not GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type="anomaly").set(anomaly) _LOGGER.info( "Got current values in Mainhandler = %s and newest value = %s, IDXMAX = %s", current_metric_value.metric_values, current_metric_value.metric_values.loc[ current_metric_value.metric_values.ds.idxmax(), 'y'], current_metric_value.metric_values.ds.idxmax()) GAUGE_DICT[metric_name].labels( **predictor_model.metric.label_config, value_type="size").set(prediction_data_size) self.write(generate_latest(REGISTRY).decode("utf-8")) self.set_header("Content-Type", "text; charset=utf-8")
def get_metric_obj(metric_data): return Metric(metric_data)