Beispiel #1
0
def main(dataset, subdataset):
    logging.basicConfig(
        level="INFO",
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")

    # # prepare the data
    # (x_train, _), (x_test, y_test) = get_data(
    #     config.dataset,
    #     config.max_train_size,
    #     config.max_test_size,
    #     train_start=config.train_start,
    #     test_start=config.test_start,
    # )

    (x_train, _), (x_test, y_test) = load_dataset(dataset, subdataset)

    tf.reset_default_graph()
    # construct the model under `variable_scope` named 'model'
    with tf.variable_scope("model") as model_vs:
        model = OmniAnomaly(config=config, name="model")

        # construct the trainer
        trainer = Trainer(
            model=model,
            model_vs=model_vs,
            max_epoch=config.max_epoch,
            batch_size=config.batch_size,
            valid_batch_size=config.test_batch_size,
            initial_lr=config.initial_lr,
            lr_anneal_epochs=config.lr_anneal_epoch_freq,
            lr_anneal_factor=config.lr_anneal_factor,
            grad_clip_norm=config.gradient_clip_norm,
            valid_step_freq=config.valid_step_freq,
        )

        # construct the predictor
        predictor = Predictor(
            model,
            batch_size=config.batch_size,
            n_z=config.test_n_z,
            last_point_only=True,
        )

        with tf.Session().as_default():

            if config.restore_dir is not None:
                # Restore variables from `save_dir`.
                saver = VariableSaver(get_variables_as_dict(model_vs),
                                      config.restore_dir)
                saver.restore()

            if config.max_epoch > 0:
                # train the model
                train_start = time.time()
                best_valid_metrics = trainer.fit(x_train)
                train_time = time.time() - train_start
                # best_valid_metrics.update({"train_time": train_time})
            else:
                best_valid_metrics = {}

            # get score of train set for POT algorithm
            train_score, train_z, train_pred_speed = predictor.get_score(
                x_train)
            if config.train_score_filename is not None:
                with open(
                        os.path.join(config.result_dir,
                                     config.train_score_filename),
                        "wb") as file:
                    pickle.dump(train_score, file)
            if config.save_z:
                save_z(train_z, "train_z")

            if x_test is not None:
                # get score of test set
                test_start = time.time()
                test_score, test_z, pred_speed = predictor.get_score(x_test)
                test_time = time.time() - test_start
                if config.save_z:
                    save_z(test_z, "test_z")
                best_valid_metrics.update({
                    "pred_time": pred_speed,
                    "pred_total_time": test_time
                })
                if config.test_score_filename is not None:
                    with open(
                            os.path.join(config.result_dir,
                                         config.test_score_filename),
                            "wb",
                    ) as file:
                        pickle.dump(test_score, file)

                if y_test is not None and len(y_test) >= len(test_score):
                    if config.get_score_on_dim:
                        # get the joint score
                        test_score = np.sum(test_score, axis=-1)
                        train_score = np.sum(train_score, axis=-1)

                    # get best f1
                    t, th = bf_search(
                        test_score,
                        y_test[-len(test_score):],
                        start=config.bf_search_min,
                        end=config.bf_search_max,
                        step_num=int(
                            abs(config.bf_search_max - config.bf_search_min) /
                            config.bf_search_step_size),
                        display_freq=50,
                    )
                    # get pot results
                    pot_result = pot_eval(
                        train_score,
                        test_score,
                        y_test[-len(test_score):],
                        level=config.level,
                    )

                    # output the results
                    best_valid_metrics.update({
                        "best-f1":
                        t[0],
                        "precision":
                        t[1],
                        "recall":
                        t[2],
                        "TP":
                        t[3],
                        "TN":
                        t[4],
                        "FP":
                        t[5],
                        "FN":
                        t[6],
                        "latency":
                        t[-1],
                        "threshold":
                        th,
                        "test_score":
                        test_score,
                        "labels":
                        y_test[-len(test_score):],
                    })
                    best_valid_metrics.update(pot_result)
                results.update_metrics(best_valid_metrics)

            if config.save_dir is not None:
                # save the variables
                var_dict = get_variables_as_dict(model_vs)
                saver = VariableSaver(var_dict, config.save_dir)
                saver.save()
            print("=" * 30 + "result" + "=" * 30)
            pprint(best_valid_metrics)

            return best_valid_metrics
Beispiel #2
0
def main():
    logging.basicConfig(
        level='INFO',
        format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')

    # prepare the data
    (x_train, _), (x_test, y_test) = \
        get_data(config.dataset, config.max_train_size, config.max_test_size, train_start=config.train_start,
                 test_start=config.test_start)

    # construct the model under `variable_scope` named 'model'
    with tf.variable_scope('model') as model_vs:
        model = OmniAnomaly(config=config, name="model")

        # construct the trainer
        trainer = Trainer(model=model,
                          model_vs=model_vs,
                          max_epoch=config.max_epoch,
                          batch_size=config.batch_size,
                          valid_batch_size=config.test_batch_size,
                          initial_lr=config.initial_lr,
                          lr_anneal_epochs=config.lr_anneal_epoch_freq,
                          lr_anneal_factor=config.lr_anneal_factor,
                          grad_clip_norm=config.gradient_clip_norm,
                          valid_step_freq=config.valid_step_freq)

        # construct the predictor
        predictor = Predictor(model,
                              batch_size=config.batch_size,
                              n_z=config.test_n_z,
                              last_point_only=True)

        with tf.Session().as_default():

            if config.restore_dir is not None:
                # Restore variables from `save_dir`.
                saver = VariableSaver(get_variables_as_dict(model_vs),
                                      config.restore_dir)
                saver.restore()

            if config.max_epoch > 0:
                # train the model
                train_start = time.time()
                best_valid_metrics = trainer.fit(x_train)
                train_time = (time.time() - train_start) / config.max_epoch
                best_valid_metrics.update({'train_time': train_time})
            else:
                best_valid_metrics = {}

            # get score of train set for POT algorithm
            train_score, train_z, train_pred_speed = predictor.get_score(
                x_train)
            if config.train_score_filename is not None:
                with open(
                        os.path.join(config.result_dir,
                                     config.train_score_filename),
                        'wb') as file:
                    pickle.dump(train_score, file)
            if config.save_z:
                save_z(train_z, 'train_z')

            if x_test is not None:
                # get score of test set
                test_start = time.time()
                test_score, test_z, pred_speed = predictor.get_score(x_test)
                test_time = time.time() - test_start
                if config.save_z:
                    save_z(test_z, 'test_z')
                best_valid_metrics.update({
                    'pred_time': pred_speed,
                    'pred_total_time': test_time
                })
                if config.test_score_filename is not None:
                    with open(
                            os.path.join(config.result_dir,
                                         config.test_score_filename),
                            'wb') as file:
                        pickle.dump(test_score, file)

                if y_test is not None and len(y_test) >= len(test_score):
                    if config.get_score_on_dim:
                        # get the joint score
                        test_score = np.sum(test_score, axis=-1)
                        train_score = np.sum(train_score, axis=-1)

                    # get best f1
                    t, th = bf_search(
                        test_score,
                        y_test[-len(test_score):],
                        start=config.bf_search_min,
                        end=config.bf_search_max,
                        step_num=int(
                            abs(config.bf_search_max - config.bf_search_min) /
                            config.bf_search_step_size),
                        display_freq=50)
                    # get pot results
                    pot_result = pot_eval(train_score,
                                          test_score,
                                          y_test[-len(test_score):],
                                          level=config.level)

                    # output the results
                    best_valid_metrics.update({
                        'best-f1': t[0],
                        'precision': t[1],
                        'recall': t[2],
                        'TP': t[3],
                        'TN': t[4],
                        'FP': t[5],
                        'FN': t[6],
                        'latency': t[-1],
                        'threshold': th
                    })
                    best_valid_metrics.update(pot_result)
                results.update_metrics(best_valid_metrics)

            if config.save_dir is not None:
                # save the variables
                var_dict = get_variables_as_dict(model_vs)
                saver = VariableSaver(var_dict, config.save_dir)
                saver.save()
            print('=' * 30 + 'result' + '=' * 30)
            pprint(best_valid_metrics)
Beispiel #3
0
def main():
    if config.GPU_device_number != "-1":
        os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_device_number
    logging.basicConfig(
        level='INFO',
        format='%(asctime)s [%(levelname)s] %(name)s: %(message)s')
    save_z_flag = int(config.save_z)
    get_score_flag = int(config.get_score_for_each_machine_flag)
    config.untrainable_variables_keyvalues = (config.untrainable_variables_keyvalues.replace(" ", '')).split(',') \
        if config.untrainable_variables_keyvalues is not None else None
    dataset_list = (config.dataset.replace(" ", '')).split(',')
    config.sample_ratio = 1.0 / len(
        dataset_list) if config.sample_ratio is None else config.sample_ratio
    config.x_dim = get_data_dim(dataset_list)

    # prepare the data
    (x_train_list, _), (x_test_list, y_test_list) = \
        get_data(dataset_list, config.max_train_size, config.max_test_size, train_start=config.train_start,
                 test_start=config.test_start)

    # construct the model under `variable_scope` named 'model'
    with tf.variable_scope(config.save_dir) as model_vs:
        model = OmniAnomaly(config=config, name=config.save_dir)
        # construct the trainer
        trainer = Trainer(model=model,
                          model_vs=model_vs,
                          max_epoch=config.max_epoch,
                          batch_size=config.batch_size,
                          valid_batch_size=config.test_batch_size,
                          initial_lr=config.initial_lr,
                          lr_anneal_epochs=config.lr_anneal_epoch_freq,
                          lr_anneal_factor=config.lr_anneal_factor,
                          grad_clip_norm=config.gradient_clip_norm,
                          valid_step_freq=config.valid_step_freq,
                          untrainable_variables_keyvalues=config.
                          untrainable_variables_keyvalues)

        # construct the predictor
        predictor = Predictor(model,
                              batch_size=config.batch_size,
                              n_z=config.test_n_z,
                              last_point_only=True)

        with tf.Session().as_default():

            if config.restore_dir is not None:
                # Restore variables from `save_dir`.
                saver = VariableSaver(get_variables_as_dict(model_vs),
                                      config.restore_dir)
                saver.restore()

            if config.max_epoch > 0:
                # train the model
                train_start = time.time()
                best_valid_metrics = trainer.fit(
                    x_train_list, sample_ratio=config.sample_ratio)
                train_time = (time.time() - train_start) / config.max_epoch
                best_valid_metrics.update({'train_time': train_time})
            else:
                best_valid_metrics = {}

            # get score of train set for POT algorithm
            if get_score_flag:
                for ds, x_train, x_test, y_test in zip(dataset_list,
                                                       x_train_list,
                                                       x_test_list,
                                                       y_test_list):
                    train_score, train_z, train_pred_speed = predictor.get_score(
                        x_train)
                    if config.train_score_filename is not None:
                        with open(
                                os.path.join(
                                    config.result_dir,
                                    f'{ds}-{config.train_score_filename}'),
                                'wb') as file:
                            pickle.dump(train_score, file)
                    if save_z_flag:
                        save_z(
                            train_z,
                            os.path.join(config.result_dir, f'{ds}-train_z'))

                    test_start = time.time()
                    test_score, test_z, pred_speed = predictor.get_score(
                        x_test)
                    test_time = time.time() - test_start
                    if config.test_score_filename is not None:
                        with open(
                                os.path.join(
                                    config.result_dir,
                                    f'{ds}-{config.test_score_filename}'),
                                'wb') as file:
                            pickle.dump(test_score, file)
                    if save_z_flag:
                        save_z(test_z,
                               os.path.join(config.result_dir, f'{ds}-test_z'))

                    if y_test is not None and len(y_test) >= len(test_score):
                        if config.get_score_on_dim:
                            # get the joint score
                            test_score = np.sum(test_score, axis=-1)
                            train_score = np.sum(train_score, axis=-1)

                        # get best f1
                        t, th = bf_search(test_score,
                                          y_test[-len(test_score):],
                                          start=config.bf_search_min,
                                          end=config.bf_search_max,
                                          step_num=int(
                                              abs(config.bf_search_max -
                                                  config.bf_search_min) /
                                              config.bf_search_step_size),
                                          display_freq=50)
                        # get pot results
                        pot_result = pot_eval(train_score,
                                              test_score,
                                              y_test[-len(test_score):],
                                              level=config.level)
                        result_dict = {
                            'pred_time': pred_speed,
                            'pred_total_time': test_time,
                            'best-f1': t[0],
                            'precision': t[1],
                            'recall': t[2],
                            'TP': t[3],
                            'TN': t[4],
                            'FP': t[5],
                            'FN': t[6],
                            'latency': t[-1],
                            'threshold': th
                        }
                        for pot_key, pot_value in pot_result.items():
                            result_dict[pot_key] = pot_value
                        with open(
                                os.path.join(config.result_dir,
                                             f'{ds}-result.json'),
                                'wb') as file:
                            pickle.dump(result_dict, file)

            if config.save_dir is not None:
                # save the variables
                var_dict = get_variables_as_dict(model_vs)
                saver = VariableSaver(var_dict, config.save_dir)
                saver.save()
            print('=' * 30 + 'result' + '=' * 30)
            pprint(best_valid_metrics)