Beispiel #1
0
def test_detector():
    # read in the data
    file_path = data_file_path
    data = CSVDataset(file_path, header=1, timestamp=0, values=1,
                      test_size=0).get_data()[0]
    data = list(
        zip([
            datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp()
            for x in data["timestamp"]
        ], data["values"]))

    # finding min max of the value
    min_value = 10e10
    max_value = -10e10
    for record in data:
        if min_value > record[1]:
            min_value = record[1]
        if max_value < record[1]:
            max_value = record[1]

    # initialize the detector, assume a docker service is already running
    detector = HTMAnomalyDetector("timestamp", "value")
    # set the probation_number to be 10% of the length of the original data set
    detector.initialize(docker_path=docker_path,
                        probation_number=int(len(data) * 0.10),
                        lower_data_limit=min_value,
                        upper_data_limit=max_value)

    # train with data
    result = detector.train(data)
    result_anomaly_score = []
    for r in result:
        result_anomaly_score.append(r["anomalyScore"])
Beispiel #2
0
def test_detector():
    # read in the data
    file_path = project_path + "/../data/NAB_data/data/realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv"
    data = CSVDataset(file_path, header=1, values=1, test_size=0).get_data()[0]["values"]

    # finding min max of the value
    min_value = min(data)
    max_value = max(data)

    # initialize the detector
    detector = ContextOSEDetector()

    # set the probationary period to be 150 for testing
    detector.initialize(min_value=min_value, max_value=max_value, probationary_period=150)

    # handle all the record
    all_result = detector.handle_record_sequence(data)
    draw_array(all_result)
Beispiel #3
0
def ttest_detector_on_file():
    detector = AutoEncoderDetectorForest()

    window_size = 4

    # tell the detector, each input record is of length 2
    detector.initialize(window_size, 3)

    file_path = project_path + "/../data/NAB_data/data/realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv"
    data = CSVDataset(file_path, header=1, values=1,
                      test_size=0).get_data()[0]["values"]

    windowed_data = windowed_list(data, window_size=window_size)

    # randomly choice 15% of data to train
    training_data = random.choices(windowed_data,
                                   k=int(len(windowed_data) * 0.15))

    training_data = torch.from_numpy(np.array(training_data, dtype="float32"))

    loss_list = detector.train(training_data, num_epochs=4000, verbose=True)

    result = detector.handle_record_sequence(windowed_data)

    plots = len(loss_list) + 2
    for i in range(1, plots - 1):
        plt.subplot(plots * 100 + 10 + i)
        plt.plot(np.arange(len(loss_list[i - 1])), loss_list[i - 1])
        plt.xlabel('Epoch')
        plt.ylabel('Loss')

    plt.subplot(plots * 100 + 10 + plots - 1)
    plt.plot(np.arange(len(data)), data)
    # plt.title('Original Data')
    plt.xlabel('Index')
    plt.ylabel('Value')

    plt.subplot(plots * 100 + 10 + plots)
    plt.plot(np.arange(len(result)), result)
    # plt.title('Anomaly score')
    plt.xlabel('Index')
    plt.ylabel('Score')
    plt.show()
def test_detector():
    # read in the data
    file_path = project_path + "/../data/NAB_data/data/realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv"
    data = CSVDataset(file_path, header=1, values=1,
                      test_size=0).get_data()[0]["values"]

    # finding min max of the value
    min_value = min(data)
    max_value = max(data)

    # initialize the detector
    detector = RelativeEntropyDetector()
    # set the window_size to be 52 and n_bins to be 5 for testing a normal case
    detector.initialize(input_min=min_value,
                        input_max=max_value,
                        window_size=52,
                        n_bins=5)

    # handle all the record
    result = detector.handle_record_sequence(data)
    draw_array(result)
Beispiel #5
0
def main():
    file_list = _get_file_list()

    for filename in file_list:
        if args.dataset == "credit":
            dataset = CSVDataset(filename,
                                 header=1,
                                 timestamp=0,
                                 values=tuple(range(1, 30)),
                                 label=30,
                                 test_size=args.test_size,
                                 shuffle=args.shuffle)
            x_train, y_train, x_test, y_test = _organize_data(dataset)
        else:
            raise ValueError("dataset %s cannot be recognized" % args.dataset)

        normalizer = Normalizer(zero_mean=True)
        x_train_norm = normalizer.process_training_data(x_train)
        x_test_norm = normalizer.process_testing_data(x_test)
        # convert to torch tensor
        x_train_torch = torch.from_numpy(x_train_norm)
        x_test_torch = torch.from_numpy(x_test_norm)

        model = AutoEncoderDetector()
        model.initialize(x_train.shape[1], use_gpu=~args.no_gpu)

        # train the model
        model.train(x_train_torch,
                    num_epochs=args.epochs,
                    verbose=args.verbose)

        # predict the test data
        x_pred_torch = model.predict(x_test_torch)
        x_pred = x_pred_torch.detach().numpy()

        # visualization
        model.visualize(x_test_norm, x_pred, y_test)
Beispiel #6
0
def main():
    # reproducibility
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    val_score_list = []
    test_score_list = []
    y_val_list = []
    y_test_list = []
    file_list = _get_file_list()

    for filename in file_list:
        if args.dataset == "synth":
            dataset = SynthDataset()
            x_train, y_train, x_test, y_test = dataset.get_data()
        elif args.dataset == 'yahoo':
            dataset = CSVDataset(filename,
                                 header=1,
                                 values=1,
                                 label=2,
                                 timestamp=0,
                                 test_size=args.test_size)
            x_train, y_train, x_test, y_test = _organize_data(dataset)
        elif args.dataset == 'nab':
            dataset = CSVDataset(filename,
                                 timestamp=0,
                                 values=1,
                                 label=2,
                                 test_size=args.test_size)
            x_train, y_train, x_test, y_test = _organize_data(dataset)
        else:
            raise ValueError("dataset %s not recognized" % args.dataset)

        normalizer = Normalizer(zero_mean=True)
        x_train_norm = normalizer.process_training_data(x_train)
        x_test_norm = normalizer.process_testing_data(x_test)
        # train, val split
        if args.validate:
            x_train_norm, x_val_norm, y_train, y_val = train_test_split(
                x_train_norm, y_train, test_size=args.val_size, shuffle=False)
            x_val_torch = torch.from_numpy(x_val_norm)
        # convert to torch tensor
        x_train_torch = torch.from_numpy(x_train_norm)
        x_test_torch = torch.from_numpy(x_test_norm)

        output_size = 1
        model = _initialize_model(output_size)

        # train the model
        model.train(x_train_torch.view((1, -1, output_size)),
                    num_epoches=args.epoches,
                    verbose=args.verbose)

        # put the whole sequence in pred
        if args.validate:
            x_total = torch.cat((x_train_torch, x_val_torch, x_test_torch), 0)
        else:
            x_total = torch.cat((x_train_torch, x_test_torch), 0)
        # predict
        x_pred_norm = _predict(model, x_total, output_size,
                               x_train_torch.shape[0])
        # calculate score
        test_score = anomaly_score(x_pred_norm[-len(x_test_torch):],
                                   x_test_norm)
        if args.validate:
            val_score = anomaly_score(
                x_pred_norm[len(x_train_torch):len(x_train_torch) +
                            len(x_val_torch)], x_val_norm)
            test_score = anomaly_score(x_pred_norm[-len(x_test_torch):],
                                       x_test_norm)

            val_score_list.append(val_score)
            test_score_list.append(test_score)
            y_val_list.append(y_val)
            y_test_list.append(y_test)
        else:
            # de-normalize
            x_pred = normalizer.recover_data(x_pred_norm)
            # visualization
            model.visualize(np.concatenate((x_train, x_test), 0), x_pred,
                            test_score, np.concatenate((y_train, y_test), 0),
                            len(x_train))

    if args.validate:
        _save_results(val_score_list, test_score_list, y_val_list, y_test_list)