def test_simple_index_encoding_parsing(self): encoding = create_test_encoding(value_encoding=ValueEncodings.SIMPLE_INDEX.value, prefix_length=3, padding=True) parser = self._get_parser(encoding=ValueEncodings.SIMPLE_INDEX.value) train_df = simple_index(self.train_log, self.labelling, encoding) test_df = simple_index(self.test_log, self.labelling, encoding) train_df, targets_df = self._drop_columns_and_split(train_df) test_df, _ = self._drop_columns_and_split(test_df) test_df.iloc[0, 0] = 'test123' parser.parse_training_dataset(train_df) parser.parse_targets(targets_df) parser.parse_testing_dataset(test_df)
def test_header(self): df = simple_index(self.log, self.labelling, self.encoding) self.assertIn("trace_id", df.columns.values) self.assertIn("label", df.columns.values) self.assertIn("elapsed_time", df.columns.values) self.assertIn("prefix_1", df.columns.values)
def test_prefix1_no_label(self): df = simple_index(self.log, create_test_labelling(label_type=LabelTypes.NO_LABEL.value), self.encoding) self.assertEqual(df.shape, (2, 2)) row1 = df[df.trace_id == '5'].iloc[0] self.assertListEqual(['5', 'register request'], row1.values.tolist()) row2 = df[df.trace_id == '4'].iloc[0] self.assertListEqual(['4', 'register request'], row2.values.tolist())
def test_prefix1(self): df = simple_index(self.log, self.labelling, self.encoding) self.assertEqual(df.shape, (2, 4)) row1 = df[df.trace_id == '5'].iloc[0] self.assertListEqual(['5', 'register request', 0.0, 1576440.0], row1.values.tolist()) row2 = df[df.trace_id == '4'].iloc[0] self.assertListEqual(['4', 'register request', 0.0, 520920.0], row2.values.tolist())
def test_nn_classifier_simple_index_multiclass_no_exceptions(self): encoding = create_test_encoding(value_encoding=ValueEncodings.SIMPLE_INDEX.value, prefix_length=2, padding=True) labelling = create_test_labelling(label_type=LabelTypes.NEXT_ACTIVITY.value) train_df = simple_index(self.train_log, labelling, encoding) test_df = simple_index(self.test_log, labelling, encoding) train_df, targets_df = self._drop_columns_and_split(train_df) targets_df = targets_df.values.ravel() test_df, _ = self._drop_columns_and_split(test_df) config = self._get_nn_default_config(binary=False) nn_classifier = NNClassifier(**config) with HidePrints(): nn_classifier.fit(train_df, targets_df) nn_classifier.predict(test_df) nn_classifier.predict_proba(test_df)
def test_prefix2(self): df = simple_index(self.log, self.labelling, create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2)) self.assertEqual(df.shape, (2, 5)) row1 = df[df.trace_id == '5'].iloc[0] self.assertListEqual(['5', 'register request', 'examine casually', 90840.0, 1485600.0], row1.values.tolist()) row2 = df[df.trace_id == '4'].iloc[0] self.assertListEqual(['4', 'register request', 'check ticket', 75840.0, 445080.0], row2.values.tolist())
def test_rnn_time_series_predictor_simple_index_no_exceptions(self): encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, prefix_length=5, padding=True) labelling = create_test_labelling( label_type=LabelTypes.DURATION.value, threshold_type=ThresholdTypes.THRESHOLD_MEAN.value) train_df = simple_index(self.train_log, labelling, encoding) test_df = simple_index(self.test_log, labelling, encoding) train_df, targets_df = self._drop_columns_and_split(train_df) test_df, _ = self._drop_columns_and_split(test_df) config = self._get_rnn_default_config() rnn_time_series_predictor = RNNTimeSeriesPredictor(**config) # with HidePrints(): rnn_time_series_predictor.fit(train_df) rnn_time_series_predictor.predict(test_df)
def test_nn_regressor_simple_index_no_exceptions(self): encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, prefix_length=2, padding=True) labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) train_df = simple_index(self.train_log, labelling, encoding) test_df = simple_index(self.test_log, labelling, encoding) train_df, targets_df = self._drop_columns_and_split(train_df) targets_df = targets_df.values.ravel() test_df, _ = self._drop_columns_and_split(test_df) config = self._get_nn_default_config() nn_regressor = NNRegressor(**config) # with HidePrints(): nn_regressor.fit(train_df, targets_df) nn_regressor.predict(test_df)
def test_prefix10_padding(self): df = simple_index(self.log, self.labelling, create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True)) self.assertEqual(df.shape, (2, 13)) row1 = df[df.trace_id == '4'].iloc[0] self.assertListEqual( ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist()) self.assertFalse(df.isnull().values.any())
def test_prefix10(self): df = simple_index(self.log, self.labelling, create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10)) self.assertEqual(df.shape, (1, 13)) row1 = df[df.trace_id == '5'].iloc[0] self.assertListEqual( ['5', 'register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'check ticket', 'examine casually', 'decide', 'reinitiate request', 'examine casually', 1296240.0, 280200.0], row1.values.tolist())
def test_prefix1_no_elapsed_time(self): label = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) df = simple_index(self.log, label, encoding) self.assertEqual(df.shape, (2, 3)) row1 = df[df.trace_id == '5'].iloc[0] self.assertListEqual(['5', 'register request', 1576440.0], row1.values.tolist()) row2 = df[df.trace_id == '4'].iloc[0] self.assertListEqual(['4', 'register request', 520920.0], row2.values.tolist())
def test_prefix10_all_in_one(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, prefix_length=10) df = simple_index(self.log, self.labelling, encoding) self.assertEqual(df.shape, (10, 13)) row1 = df[df.trace_id == '5'].iloc[9] self.assertListEqual( ['5', 'register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'check ticket', 'examine casually', 'decide', 'reinitiate request', 'examine casually', 1296240.0, 280200.0], row1.values.tolist()) self.assertFalse(df.isnull().values.any())
def test_eval(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, add_elapsed_time=True, prefix_length=12, padding=True) df = simple_index( get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)), create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding) self.assertEqual(df.shape, (41, 15)) row1 = df[df.trace_id == '4'].iloc[4] self.assertListEqual( ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0, 0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist()) self.assertFalse(df.isnull().values.any())