def test_frequency_encoding_parsing(self): encoding = create_test_encoding(value_encoding=ValueEncodings.FREQUENCY.value, prefix_length=2, padding=True) parser = self._get_parser(encoding=ValueEncodings.FREQUENCY.value) train_df = frequency(self.train_log, self.train_event_names, self.labelling, encoding) test_df = frequency(self.test_log, self.test_event_names, self.labelling, encoding) train_df, targets_df = self._drop_columns_and_split(train_df) test_df, _ = self._drop_columns_and_split(test_df) parser.parse_training_dataset(train_df) parser.parse_targets(targets_df) parser.parse_testing_dataset(test_df)
def _eventlog_to_dataframe(log: EventLog, encoding: Encoding, labelling: Labelling, additional_columns=None, cols=None): if encoding.prefix_length < 1: raise ValueError("Prefix length must be greater than 1") if encoding.value_encoding == ValueEncodings.SIMPLE_INDEX.value: run_df = simple_index(log, labelling, encoding) elif encoding.value_encoding == ValueEncodings.BOOLEAN.value: if cols is None: cols = unique_events(log) run_df = boolean(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.FREQUENCY.value: if cols is None: cols = unique_events(log) run_df = frequency(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.COMPLEX.value: run_df = complex(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.LAST_PAYLOAD.value: run_df = last_payload(log, labelling, encoding, additional_columns) # elif encoding.value_encoding == ValueEncodings.SEQUENCES.value: #TODO JONAS # run_df = sequences(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.DECLARE.value: run_df = declare_encoding(log, labelling, encoding, additional_columns, cols=cols) if cols is None: cols = list(run_df.columns) else: raise ValueError("Unknown value encoding method {}".format( encoding.value_encoding)) return run_df, cols
def test_prefix1_no_elapsed_time(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) df = frequency(self.log, self.event_names, self.labelling, encoding) self.assertEqual(df.shape, (2, 9)) self.assertNotIn('elapsed_time', df.columns.values.tolist())
def test_header(self): df = frequency(self.log, self.event_names, self.labelling, self.encoding) names = [ 'register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'examine thoroughly', 'reject request', 'trace_id', 'label', 'elapsed_time' ] for name in names: self.assertIn(name, df.columns.values.tolist())
def test_prefix1(self): df = frequency(self.log, self.event_names, self.labelling, self.encoding) self.assertEqual(df.shape, (2, 10)) row1 = df[df.trace_id == '5'].iloc[0] self.assertTrue(row1['register request']) self.assertFalse(row1['examine casually']) self.assertEqual(1576440.0, row1.label) row2 = df[df.trace_id == '4'].iloc[0] self.assertTrue(row2['register request']) self.assertFalse(row2['examine casually']) self.assertEqual(520920.0, row2.label)
def test_prefix10(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10) df = frequency(self.log, self.event_names, self.labelling, encoding) self.assertEqual(df.shape, (1, 10)) row1 = df[df.trace_id == '5'].iloc[0] self.assertListEqual(['5', 1, 3, 2, 2, 2, 0, 0, 1296240.0, 280200.0], row1.values.tolist()) self.assertFalse(df.isnull().values.any())
def test_prefix5(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=5) df = frequency(self.log, self.event_names, self.labelling, encoding) self.assertEqual(df.shape, (2, 10)) row1 = df[df.trace_id == '5'].iloc[0] # 1 == True, 0 == False self.assertListEqual([ '5', True, True, True, True, True, False, False, 458160.0, 1118280.0 ], row1.values.tolist())
def test_prefix2(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) df = frequency(self.log, self.event_names, self.labelling, encoding) self.assertEqual(df.shape, (2, 10)) row1 = df[df.trace_id == '5'].iloc[0] self.assertTrue(row1['register request']) self.assertTrue(row1['examine casually']) self.assertEqual(1485600.0, row1.label) row2 = df[df.trace_id == '4'].iloc[0] self.assertTrue(row2['register request']) self.assertFalse(row2['examine casually']) self.assertTrue(row2['check ticket']) self.assertEqual(445080.0, row2.label)
def test_prefix1_no_label(self): labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value) df = frequency(self.log, self.event_names, labelling, self.encoding) self.assertEqual(df.shape, (2, 8)) self.assertNotIn('label', df.columns.values.tolist())