예제 #1
0
    def test_simple_index_encoding_parsing(self):
        encoding = create_test_encoding(value_encoding=ValueEncodings.SIMPLE_INDEX.value, prefix_length=3, padding=True)

        parser = self._get_parser(encoding=ValueEncodings.SIMPLE_INDEX.value)
        train_df = simple_index(self.train_log, self.labelling, encoding)
        test_df = simple_index(self.test_log, self.labelling, encoding)

        train_df, targets_df = self._drop_columns_and_split(train_df)
        test_df, _ = self._drop_columns_and_split(test_df)
        test_df.iloc[0, 0] = 'test123'

        parser.parse_training_dataset(train_df)
        parser.parse_targets(targets_df)
        parser.parse_testing_dataset(test_df)
    def test_header(self):
        df = simple_index(self.log, self.labelling, self.encoding)

        self.assertIn("trace_id", df.columns.values)
        self.assertIn("label", df.columns.values)
        self.assertIn("elapsed_time", df.columns.values)
        self.assertIn("prefix_1", df.columns.values)
    def test_prefix1_no_label(self):
        df = simple_index(self.log, create_test_labelling(label_type=LabelTypes.NO_LABEL.value), self.encoding)

        self.assertEqual(df.shape, (2, 2))
        row1 = df[df.trace_id == '5'].iloc[0]
        self.assertListEqual(['5', 'register request'], row1.values.tolist())
        row2 = df[df.trace_id == '4'].iloc[0]
        self.assertListEqual(['4', 'register request'], row2.values.tolist())
    def test_prefix1(self):
        df = simple_index(self.log, self.labelling, self.encoding)

        self.assertEqual(df.shape, (2, 4))
        row1 = df[df.trace_id == '5'].iloc[0]
        self.assertListEqual(['5', 'register request', 0.0, 1576440.0], row1.values.tolist())
        row2 = df[df.trace_id == '4'].iloc[0]
        self.assertListEqual(['4', 'register request', 0.0, 520920.0], row2.values.tolist())
예제 #5
0
    def test_nn_classifier_simple_index_multiclass_no_exceptions(self):
        encoding = create_test_encoding(value_encoding=ValueEncodings.SIMPLE_INDEX.value, prefix_length=2, padding=True)
        labelling = create_test_labelling(label_type=LabelTypes.NEXT_ACTIVITY.value)

        train_df = simple_index(self.train_log, labelling, encoding)
        test_df = simple_index(self.test_log, labelling, encoding)

        train_df, targets_df = self._drop_columns_and_split(train_df)
        targets_df = targets_df.values.ravel()

        test_df, _ = self._drop_columns_and_split(test_df)

        config = self._get_nn_default_config(binary=False)
        nn_classifier = NNClassifier(**config)

        with HidePrints():
            nn_classifier.fit(train_df, targets_df)
            nn_classifier.predict(test_df)
            nn_classifier.predict_proba(test_df)
    def test_prefix2(self):
        df = simple_index(self.log, self.labelling, create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=2))

        self.assertEqual(df.shape, (2, 5))
        row1 = df[df.trace_id == '5'].iloc[0]
        self.assertListEqual(['5', 'register request', 'examine casually', 90840.0, 1485600.0], row1.values.tolist())
        row2 = df[df.trace_id == '4'].iloc[0]
        self.assertListEqual(['4', 'register request', 'check ticket', 75840.0, 445080.0], row2.values.tolist())
    def test_rnn_time_series_predictor_simple_index_no_exceptions(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            prefix_length=5,
            padding=True)
        labelling = create_test_labelling(
            label_type=LabelTypes.DURATION.value,
            threshold_type=ThresholdTypes.THRESHOLD_MEAN.value)

        train_df = simple_index(self.train_log, labelling, encoding)
        test_df = simple_index(self.test_log, labelling, encoding)

        train_df, targets_df = self._drop_columns_and_split(train_df)

        test_df, _ = self._drop_columns_and_split(test_df)

        config = self._get_rnn_default_config()
        rnn_time_series_predictor = RNNTimeSeriesPredictor(**config)

        # with HidePrints():
        rnn_time_series_predictor.fit(train_df)
        rnn_time_series_predictor.predict(test_df)
    def test_nn_regressor_simple_index_no_exceptions(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            prefix_length=2,
            padding=True)
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        train_df = simple_index(self.train_log, labelling, encoding)
        test_df = simple_index(self.test_log, labelling, encoding)

        train_df, targets_df = self._drop_columns_and_split(train_df)
        targets_df = targets_df.values.ravel()

        test_df, _ = self._drop_columns_and_split(test_df)

        config = self._get_nn_default_config()
        nn_regressor = NNRegressor(**config)

        # with HidePrints():
        nn_regressor.fit(train_df, targets_df)
        nn_regressor.predict(test_df)
    def test_prefix10_padding(self):
        df = simple_index(self.log, self.labelling, create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10, padding=True))

        self.assertEqual(df.shape, (2, 13))
        row1 = df[df.trace_id == '4'].iloc[0]
        self.assertListEqual(
            ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0,
             0, 0, 520920.0, 0.0], row1.values.tolist())
        self.assertFalse(df.isnull().values.any())
    def test_prefix10(self):
        df = simple_index(self.log, self.labelling, create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10))

        self.assertEqual(df.shape, (1, 13))
        row1 = df[df.trace_id == '5'].iloc[0]
        self.assertListEqual(
            ['5', 'register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request',
             'check ticket', 'examine casually', 'decide', 'reinitiate request', 'examine casually', 1296240.0,
             280200.0], row1.values.tolist())
    def test_prefix1_no_elapsed_time(self):
        label = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=1)
        df = simple_index(self.log, label, encoding)

        self.assertEqual(df.shape, (2, 3))
        row1 = df[df.trace_id == '5'].iloc[0]
        self.assertListEqual(['5', 'register request', 1576440.0], row1.values.tolist())
        row2 = df[df.trace_id == '4'].iloc[0]
        self.assertListEqual(['4', 'register request', 520920.0], row2.values.tolist())
    def test_prefix10_all_in_one(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            prefix_length=10)
        df = simple_index(self.log, self.labelling, encoding)

        self.assertEqual(df.shape, (10, 13))
        row1 = df[df.trace_id == '5'].iloc[9]
        self.assertListEqual(
            ['5', 'register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request',
             'check ticket', 'examine casually', 'decide', 'reinitiate request', 'examine casually', 1296240.0,
             280200.0], row1.values.tolist())
        self.assertFalse(df.isnull().values.any())
    def test_eval(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            add_elapsed_time=True,
            prefix_length=12,
            padding=True)
        df = simple_index(
            get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)),
            create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding)

        self.assertEqual(df.shape, (41, 15))
        row1 = df[df.trace_id == '4'].iloc[4]
        self.assertListEqual(
            ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0,
             0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist())
        self.assertFalse(df.isnull().values.any())