def work(train_files: Tuple[str, str], test_files: Tuple[str, str],
         hyperparam: dict, fault_list: List[dict]):
    # bagel hyperparams
    bagel_window_size = hyperparam["bagel"]["window_size"]
    time_feature = hyperparam["bagel"]["time_feature"]
    epochs = hyperparam["bagel"]["epochs"]

    study_train_file, control_train_file = train_files
    study_test_file, control_test_file = test_files

    model = bagel.Bagel(window_size=bagel_window_size,
                        time_feature=time_feature)
    # study group
    study_train_kpi = bagel.utils.load_kpi(study_train_file)
    study_test_kpi = bagel.utils.load_kpi(study_test_file)

    # load model param
    study_sign = "_".join(study_train_file.split(os.path.sep)[2:]).replace(
        "\\", "/")
    study_model_save_path = os.path.join(PROJECT_PATH, "variables",
                                         study_sign).replace("\\", "/")
    if os.path.exists(
            os.path.join(PROJECT_PATH, "variables",
                         study_sign + ".index").replace("\\", "/")):
        model.load(study_model_save_path)
    else:
        model.fit(study_train_kpi, epochs=epochs, verbose=0)
        model.save(study_model_save_path)
    try:
        _, _, _, pred_data = model.predict_one(study_test_kpi)

    except:
        pass
Esempio n. 2
0
def main():
    bagel.utils.mkdirs(output_path)
    file_list = bagel.utils.file_list(input_path)

    for file in file_list:
        kpi = bagel.utils.load_kpi(file)
        print(f'KPI: {kpi.name}')
        kpi.complete_timestamp()
        train_kpi, valid_kpi, test_kpi = kpi.split((0.49, 0.21, 0.3))
        train_kpi, mean, std = train_kpi.standardize()
        valid_kpi, _, _ = valid_kpi.standardize(mean=mean, std=std)
        test_kpi, _, _ = test_kpi.standardize(mean=mean, std=std)

        model = bagel.Bagel()
        model.fit(kpi=train_kpi.use_labels(0.),
                  validation_kpi=valid_kpi,
                  epochs=epochs,
                  verbose=1)
        anomaly_scores = model.predict(test_kpi)

        results = bagel.testing.get_test_results(labels=test_kpi.labels,
                                                 scores=anomaly_scores,
                                                 missing=test_kpi.missing,
                                                 window_size=120)
        stats = bagel.testing.get_kpi_stats(kpi, test_kpi)
        print('Metrics')
        print(f'precision: {results.get("precision"):.3f} - '
              f'recall: {results.get("recall"):.3f} - '
              f'f1score: {results.get("f1score"):.3f}\n')

        with open(output_path.joinpath(f'{kpi.name}.txt'), 'w') as output:
            output.write(f'kpi_name={kpi.name}\n\n'
                         '[result]\n'
                         f'threshold={results.get("threshold")}\n'
                         f'precision={results.get("precision"):.3f}\n'
                         f'recall={results.get("recall"):.3f}\n'
                         f'f1_score={results.get("f1score"):.3f}\n\n'
                         '[overall]\n'
                         f'num_points={stats[0].num_points}\n'
                         f'num_missing_points={stats[0].num_missing}\n'
                         f'missing_rate={stats[0].missing_rate:.6f}\n'
                         f'num_anomaly_points={stats[0].num_anomaly}\n'
                         f'anomaly_rate={stats[0].anomaly_rate:.6f}\n\n'
                         '[test]\n'
                         f'num_points={stats[1].num_points}\n'
                         f'num_missing_points={stats[1].num_missing}\n'
                         f'missing_rate={stats[1].missing_rate:.6f}\n'
                         f'num_anomaly_points={stats[1].num_anomaly}\n'
                         f'anomaly_rate={stats[1].anomaly_rate:.6f}\n')
    def call(self, body: dict):
        """
        `API call` interface

        Args:
            body: `data` field 
                {
                    "data": {{ body }}
                }
        --------------
        body: json
            The value is `request_body.data` 
        """
        # data
        test_value = body["test_value"]
        test_ts = body["test_ts"]

        train_value = body.get("train_value", None)
        train_ts = body.get("train_ts", None)

        # optional parameters
        bagel_window_size: int = body.get("bagel_window_size", 30)
        self.bagel_epochs: int = body.get("bagel_epochs", 50)
        time_feature: str = body.get("time_feature", "MH")
        self.mad_window_size: int = body.get("mad_window_size", 5)
        self.spot_init_num: int = body.get("spot_init_num", 1000)

        self.model = bagel.Bagel(window_size=bagel_window_size,
                                 time_feature=time_feature)

        if train_value is not None and train_ts is not None:
            self.train_kpi = KPI(train_ts, train_value)
        else:
            self.train_kpi = None

        self.test_kpi = KPI(test_ts, test_value)
Esempio n. 4
0
def work(train_files: Tuple[str, str], test_files: Tuple[str, str],
         hyperparam: dict, fault_list: List[dict]):
    # bagel hyperparams
    bagel_window_size = hyperparam["bagel"]["window_size"]
    time_feature = hyperparam["bagel"]["time_feature"]
    epochs = hyperparam["bagel"]["epochs"]

    # mad hyperparams
    mad_window_size = hyperparam["mad"]["window_size"]

    # spot hyperparams
    spot_init_num = hyperparam["spot"]["init_num"]

    study_train_file, control_train_file = train_files
    study_test_file, control_test_file = test_files

    name = os.path.splitext(os.path.basename(study_train_file))[0]
    svc = os.path.basename(os.path.dirname(study_train_file))

    model = bagel.Bagel(window_size=bagel_window_size,
                        time_feature=time_feature)

    # study group
    study_train_kpi = bagel.utils.load_kpi(study_train_file)
    study_test_kpi = bagel.utils.load_kpi(study_test_file)

    # load model param
    study_sign = "_".join(study_train_file.split(os.path.sep)[2:])
    study_model_save_path = os.path.join(PROJECT_PATH, "variables", study_sign)
    if os.path.exists(
            os.path.join(PROJECT_PATH, "variables", study_sign + ".index")):
        model.load(study_model_save_path)
    else:
        model.fit(study_train_kpi, epochs=epochs, verbose=0)
        model.save(study_model_save_path)
    try:
        anomaly_scores, x_mean, x_std = model.predict(study_test_kpi,
                                                      verbose=0)
        train_data_anoamly_sc, _, _ = model.predict(study_train_kpi, verbose=0)
    except:
        print("\033[36m 数据缺失... \033[0m")
        return

    # control group
    control_train_kpi = bagel.utils.load_kpi(control_train_file)
    control_test_kpi = bagel.utils.load_kpi(control_test_file)

    # remove window_size - 1 points ahead
    _, study_test_kpi = study_test_kpi.split_by_indices(bagel_window_size - 1)
    _, control_test_kpi = control_test_kpi.split_by_indices(bagel_window_size -
                                                            1)

    # mad
    mad_filter = mad(study_test_kpi.raw_values, name, mad_window_size)

    # spot
    pred_label, spot_threshold = run_spot(
        train_data_anoamly_sc[-spot_init_num:], anomaly_scores, mad_filter)

    # plot
    if PLOT_FLAG:
        try:
            post_sub_path = study_test_file.split(os.path.sep)[-5:]
            post_sub_path.remove("data")
            if "219" in post_sub_path: post_sub_path.remove("219")
            if "220" in post_sub_path: post_sub_path.remove("220")
            save_path = os.path.join(PROJECT_PATH, "img",
                                     os.path.sep.join(post_sub_path)).replace(
                                         "csv", "png")
            split_save_path = save_path.split(os.path.sep)
            svc = split_save_path.pop(-2)
            save_path = os.path.sep.join(split_save_path)
            save_path = save_path.replace(".png", f"_{svc}.png")
            study_test_ts = study_test_kpi.timestamps
            change_ts = None
            for fault in fault_list:
                if fault["start"] >= np.min(study_test_ts) and fault[
                        "start"] <= np.max(study_test_ts):
                    change_ts = fault["start"]
                    break
            integrate_plot(study_test_kpi,
                           control_test_kpi,
                           anomaly_scores,
                           x_mean,
                           x_std,
                           pred_label,
                           spot_threshold,
                           name,
                           svc,
                           save_path=save_path,
                           change_ts=change_ts)
        # 数据缺失时不画了
        except:
            print("\033[36m 数据缺失... \033[0m")
            return
    def run(self):
        """
        CLI interface
        ====>
        ```
        anomaly_decteion = AnomalyDetection()
        anomaly_detection.run()
        ```
        """
        import argparse
        parser = argparse.ArgumentParser()
        parser.add_argument("-d",
                            "--data_dir",
                            metavar="data_dir",
                            dest="data_dir",
                            type=str,
                            required=True,
                            help="The input data.")
        parser.add_argument("-p",
                            "--param_dir",
                            metavar="param_dir",
                            dest="param_dir",
                            type=str,
                            required=False,
                            default=None,
                            help="The directory where checkpoint is located.")
        # bagel
        parser.add_argument(
            "-bw",
            "--bagel_window_size",
            type=int,
            required=False,
            metavar="bagel_window_size",
            dest="bagel_window_size",
            default=30,
            help="The window size of bagel's time series. default: `30`")
        parser.add_argument(
            "-e",
            "--bagel_epochs",
            type=int,
            required=False,
            default=50,
            metavar="bagel_epochs",
            dest="bagel_epochs",
            help="The number of epochs during the training step. default: `50`"
        )
        parser.add_argument(
            "-t",
            "--time_feature",
            type=str,
            required=False,
            default="MH",
            metavar="time_feature",
            dest="time_feature",
            help=
            "The dimension of time feature during the time encoding stage. `a|A|w` week; `H` one day; `I` half of the day; `M` minute; `S` second; default: `MH`"
        )

        # median diff
        parser.add_argument(
            "-mw",
            "--mad_window_size",
            type=int,
            required=False,
            default=5,
            metavar="mad_window_size",
            dest="mad_window_size",
            help=
            "The window size used for comparing the difference of median. default: `5`"
        )

        # spot
        parser.add_argument(
            "-n",
            "--spot_init_num",
            type=int,
            required=False,
            default=1000,
            metavar="spot_init_num",
            dest="spot_init_num",
            help=
            "The number of points used for initializing SPOT. default `1000`")

        config = vars(parser.parse_args())

        # load data
        train_value, train_ts, test_value, test_ts = self.load_data(
            config["data_dir"])

        self.bagel_epochs = config["bagel_epochs"]
        self.mad_window_size = config["mad_window_size"]
        self.spot_init_num = config["spot_init_num"]

        if train_value is not None and train_ts is not None:
            self.train_kpi = KPI(train_ts, train_value)
        else:
            self.train_kpi = None
        self.test_kpi = KPI(test_ts, test_value)

        self.model = bagel.Bagel(window_size=config["bagel_window_size"],
                                 time_feature=config["time_feature"])