def run_model( datapath, freq, min_date="01-01-2018", plot=True, model_name="stl", min_value=9, min_samples_for_category=100, ): print("Loading file {0}, with frequency {1}. Model name = {2}".format( datapath, freq, model_name)) dataset = read_data(datapath, min_date=min_date) dataset = dataset.rename(columns={"is_anomaly": "label"}) is_multicategory = ("category" in dataset) or ("category" in dataset.index) if model_name == "twitter": model = TwitterAnomalyTrendinessDetector( is_multicategory=is_multicategory, freq=freq, min_value=min_value, threshold=None, max_anoms=0.49, seasonality_freq=7, ) if model_name == "ma_seasonal": model = MovingAverageSeasonalTrendinessDetector( is_multicategory=is_multicategory, freq=freq, min_value=min_value, anomaly_type="or", num_of_std=3, ) if model_name == "stl": model = STLTrendinessDetector( is_multicategory=is_multicategory, freq=freq, min_value=min_value, anomaly_type="or", num_of_std=4, lo_frac=0.5, ) if model_name == "azure": dirname = os.path.dirname(__file__) filename = os.path.join(dirname, "config/config.json") subscription_key = get_azure_subscription_key(filename) model = AzureAnomalyTrendinessDetector( is_multicategory=is_multicategory, freq=freq, min_value=min_value, subscription_key=subscription_key, ) if model_name == "lstm": model = LSTMTrendinessDetector(freq=freq, is_multicategory=is_multicategory) prediction = model.predict(dataset, verbose=True) raw_metrics = get_metrics_for_all_categories( dataset[["value"]], prediction[["prediction"]], dataset[["label"]], window_size_for_metrics=5, ) metrics = get_final_metrics(raw_metrics, summarized=False) print(metrics) ## Plot each category if plot: _, file = os.path.split(datapath) print("Plotting...") model.plot(labels=dataset["label"], postfix=file) return prediction
def run_model(datapath, freq, min_date='01-01-2018', plot=True, model_name='stl', min_value=9, min_samples_for_category=100): print("Loading file {0}, with frequency {1}. Model name = {2}".format( datapath, freq, model_name)) dataset = read_data(datapath, min_date=min_date) dataset = dataset.rename(columns={'is_anomaly': 'label'}) is_multicategory = ('category' in dataset) or ('category' in dataset.index) if model_name == 'twitter': model = TwitterAnomalyTrendinessDetector( is_multicategory=is_multicategory, freq=freq, min_value=min_value, threshold=None, max_anoms=0.49, seasonality_freq=7) if model_name == 'ma_seasonal': model = MovingAverageSeasonalTrendinessDetector( is_multicategory=is_multicategory, freq=freq, min_value=min_value, anomaly_type='or', num_of_std=3) if model_name == 'stl': model = STLTrendinessDetector(is_multicategory=is_multicategory, freq=freq, min_value=min_value, anomaly_type='or', num_of_std=4, lo_frac=0.5) if model_name == 'azure': dirname = os.path.dirname(__file__) filename = os.path.join(dirname, 'config/config.json') subscription_key = get_azure_subscription_key(filename) model = AzureAnomalyTrendinessDetector( is_multicategory=is_multicategory, freq=freq, min_value=min_value, subscription_key=subscription_key) if model_name == 'lstm': model = LSTMTrendinessDetector(freq=freq, is_multicategory=is_multicategory) prediction = model.predict(dataset, verbose=True) raw_metrics = get_metrics_for_all_categories(dataset[['value']], prediction[['prediction']], dataset[['label']], window_size_for_metrics=5) metrics = get_final_metrics(raw_metrics, summarized=False) print(metrics) ## Plot each category if plot: _, file = os.path.split(datapath) print("Plotting...") model.plot(labels=dataset['label'], postfix=file) return prediction
def evaluate_all_models( datapath="SF3H_labeled.csv", min_date="01-01-2018", freq="3H", use_comet=False, models_to_run=None, window_size_for_metrics=1, ): try: dataset = read_data(datapath, min_date=min_date) except Exception: print("File not found or failed to read") return dataset = dataset[~dataset.index.duplicated(keep="first")] dataset = dataset.rename(columns={"is_anomaly": "label"}) X = dataset[["value"]] y = dataset[["label"]] if "H" in freq: min_value = 10 else: min_value = 8 print("min value for prediction = " + str(min_value)) from comet_ml import Experiment # LSTM model if "lstm" in models_to_run: print("Evaluating LSTM model") num_std = 3 model = LSTMTrendinessDetector( is_multicategory=True, num_of_std=num_std, freq=freq, min_value=min_value ) result = eval_models( X, y, [model], label_col_name="label", train_percent=20, window_size_for_metrics=window_size_for_metrics, ) print_lstm_model(datapath, min_value, model, result) if use_comet: params = { "num_std": num_std, "window_size_for_metrics": window_size_for_metrics, } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment(datapath, dataset, model, parameters=params, metrics=metrics) # MA model if "ma_seasonal" in models_to_run or len(models_to_run) == 0: print("Evaluating MA model") anomaly_types = ["residual", "trend", "and", "or"] for num_std in [2.5, 3, 3.5, 4]: for anomaly_type in anomaly_types: model = MovingAverageSeasonalTrendinessDetector( is_multicategory=True, freq=freq, min_value=min_value, anomaly_type=anomaly_type, num_of_std=num_std, ) result = eval_models_CV( X, y, [model], label_col_name="label", n_splits=5, window_size_for_metrics=window_size_for_metrics, ) print_ma_result( anomaly_type, datapath, min_value, model, num_std, result ) if use_comet: params = { "anomaly_type": anomaly_type, "num_std": num_std, "window_size_for_metrics": window_size_for_metrics, "min_value": min_value, } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment( datapath, dataset, model, parameters=params, metrics=metrics ) # STL model if "stl" in models_to_run or len(models_to_run) == 0: print("Evaluating STL model") anomaly_types = ["residual", "trend", "and", "or"] for num_std in [2.5, 3, 3.5, 4]: for anomaly_type in anomaly_types: for lo_frac in [0.1, 0.5, 1, 1.5]: model = STLTrendinessDetector( is_multicategory=True, freq=freq, min_value=min_value, anomaly_type=anomaly_type, num_of_std=num_std, lo_frac=lo_frac, ) result = eval_models( X, y, [model], label_col_name="label", train_percent=20, window_size_for_metrics=window_size_for_metrics, ) print_stl_result( anomaly_type, datapath, min_value, model, num_std, result ) if use_comet: params = { "anomaly_type": anomaly_type, "num_std": num_std, "window_size_for_metrics": window_size_for_metrics, "min_value": min_value, "lo_frac": lo_frac, } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment( datapath, dataset, model, parameters=params, metrics=metrics ) # Twitter if "twitter" in models_to_run or len(models_to_run) == 0: print("Evaluating Twitter model") max_anoms_list = [0.05, 0.1] for max_anoms in max_anoms_list: for threshold in [None, "med_max", "p95", "p99"]: for alpha in [0.05, 0.1, 0.15]: model = TwitterAnomalyTrendinessDetector( is_multicategory=True, freq=freq, min_value=min_value, threshold=threshold, max_anoms=max_anoms, longterm=False, alpha=alpha, seasonality_freq=7, ) result = eval_models( X, y, [model], label_col_name="label", train_percent=20, window_size_for_metrics=window_size_for_metrics, ) print_twitter_result( alpha, datapath, min_value, model, result, threshold ) if use_comet: params = { "max_anoms": max_anoms, "threshold": threshold, "alpha": alpha, "window_size_for_metrics": window_size_for_metrics, "min_value": min_value, } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment( datapath, dataset, model, parameters=params, metrics=metrics ) if "azure" in models_to_run or len(models_to_run) == 0: print("Evaluating Azure model") # Get Azure subscription id for the Azure Anomaly Detector dirname = os.path.dirname(__file__) filename = os.path.join(dirname, "../config/config.json") subscription_key = get_azure_subscription_key(filename) model = AzureAnomalyTrendinessDetector( is_multicategory=True, freq=freq, min_value=min_value, subscription_key=subscription_key, sensitivity=None, ) # Call Azure Anomaly Detector API prediction = model.predict(dataset, verbose=True) sensitivity = [None, 0.3, 0.5, 1, 1.5, 2] for sens in sensitivity: print("sensitivity = {}".format(sens)) prediction = model.tune_prediction(prediction, sens) raw_metrics = get_metrics_for_all_categories( X[["value"]], prediction[["prediction"]], y[["label"]], window_size_for_metrics=5, ) metrics = get_final_metrics(raw_metrics) result = {} result[model.__name__] = metrics print_azure_model(datapath, min_value, model, result, sens) # model.plot(labels = y.reset_index().set_index('date')) if use_comet: params = { "sensitivity": sens, "window_size_for_metrics": window_size_for_metrics, } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment( datapath, dataset, model, parameters=params, metrics=metrics )
def evaluate_all_models(datapath="SF3H_labeled.csv", min_date='01-01-2018', freq='3H', use_comet=False, models_to_run=[], window_size_for_metrics=1): try: dataset = read_data(datapath, min_date=min_date) except: print("File not found or failed to read") return dataset = dataset[~dataset.index.duplicated(keep='first')] dataset = dataset.rename(columns={'is_anomaly': 'label'}) X = dataset[['value']] y = dataset[['label']] if 'H' in freq: min_value = 10 else: min_value = 8 print("min value for prediction = " + str(min_value)) from comet_ml import Experiment # LSTM model if 'lstm' in models_to_run: print("Evaluating LSTM model") num_std = 3 model = LSTMTrendinessDetector(is_multicategory=True, num_of_std=num_std, freq=freq, min_value=min_value) result = eval_models(X, y, [model], label_col_name='label', train_percent=20, window_size_for_metrics=window_size_for_metrics) print_lstm_model(datapath, min_value, model, result) if use_comet: params = { 'num_std': num_std, 'window_size_for_metrics': window_size_for_metrics } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment(datapath, dataset, model, parameters=params, metrics=metrics) # MA model if 'ma_seasonal' in models_to_run or len(models_to_run) == 0: print("Evaluating MA model") anomaly_types = ['residual', 'trend', 'and', 'or'] for num_std in [2.5, 3, 3.5, 4]: for anomaly_type in anomaly_types: model = MovingAverageSeasonalTrendinessDetector( is_multicategory=True, freq=freq, min_value=min_value, anomaly_type=anomaly_type, num_of_std=num_std) result = eval_models_CV( X, y, [model], label_col_name='label', n_splits=5, window_size_for_metrics=window_size_for_metrics) print_ma_result(anomaly_type, datapath, min_value, model, num_std, result) if use_comet: params = { 'anomaly_type': anomaly_type, 'num_std': num_std, 'window_size_for_metrics': window_size_for_metrics, 'min_value': min_value } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment(datapath, dataset, model, parameters=params, metrics=metrics) # STL model if 'stl' in models_to_run or len(models_to_run) == 0: print("Evaluating STL model") for num_std in [2.5, 3, 3.5, 4]: for anomaly_type in anomaly_types: for lo_frac in [0.1, 0.5, 1, 1.5]: model = STLTrendinessDetector(is_multicategory=True, freq=freq, min_value=min_value, anomaly_type=anomaly_type, num_of_std=num_std, lo_frac=lo_frac) result = eval_models( X, y, [model], label_col_name='label', train_percent=20, window_size_for_metrics=window_size_for_metrics) print_stl_result(anomaly_type, datapath, min_value, model, num_std, result) if use_comet: params = { 'anomaly_type': anomaly_type, 'num_std': num_std, 'window_size_for_metrics': window_size_for_metrics, 'min_value': min_value, 'lo_frac': lo_frac } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment(datapath, dataset, model, parameters=params, metrics=metrics) # Twitter if 'twitter' in models_to_run or len(models_to_run) == 0: print("Evaluating Twitter model") max_anoms_list = [0.05, 0.1] for max_anoms in max_anoms_list: for threshold in [None, 'med_max', 'p95', 'p99']: for alpha in [0.05, 0.1, 0.15]: model = TwitterAnomalyTrendinessDetector( is_multicategory=True, freq=freq, min_value=min_value, threshold=threshold, max_anoms=max_anoms, longterm=False, alpha=alpha, seasonality_freq=7) result = eval_models( X, y, [model], label_col_name='label', train_percent=20, window_size_for_metrics=window_size_for_metrics) print_twitter_result(alpha, datapath, min_value, model, result, threshold) if use_comet: params = { 'max_anoms': max_anoms, 'threshold': threshold, 'alpha': alpha, 'window_size_for_metrics': window_size_for_metrics, 'min_value': min_value } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment(datapath, dataset, model, parameters=params, metrics=metrics) if 'azure' in models_to_run or len(models_to_run) == 0: print("Evaluating Azure model") # Get Azure subscription id for the Azure Anomaly Detector dirname = os.path.dirname(__file__) filename = os.path.join(dirname, '../config/config.json') subscription_key = get_azure_subscription_key(filename) model = AzureAnomalyTrendinessDetector( is_multicategory=True, freq=freq, min_value=min_value, subscription_key=subscription_key, sensitivity=None) # Call Azure Anomaly Detector API prediction = model.predict(dataset, verbose=True) sensitivity = [None, 0.3, 0.5, 1, 1.5, 2] for sens in sensitivity: print("sensitivity = {}".format(sens)) prediction = model.tune_prediction(prediction, sens) raw_metrics = get_metrics_for_all_categories( X[['value']], prediction[['prediction']], y[['label']], window_size_for_metrics=5) metrics = get_final_metrics(raw_metrics) result = {} result[model.__name__] = metrics print_azure_model(datapath, min_value, model, result, sens) # model.plot(labels = y.reset_index().set_index('date')) if use_comet: params = { 'sensitivity': sens, 'window_size_for_metrics': window_size_for_metrics } metrics = result[model.__name__] metrics = summarize_metrics(metrics) log_experiment(datapath, dataset, model, parameters=params, metrics=metrics)