def _eventlog_to_dataframe(log: EventLog, encoding: Encoding, labelling: Labelling, additional_columns=None, cols=None): if encoding.prefix_length < 1: raise ValueError("Prefix length must be greater than 1") if encoding.value_encoding == ValueEncodings.SIMPLE_INDEX.value: run_df = simple_index(log, labelling, encoding) elif encoding.value_encoding == ValueEncodings.BOOLEAN.value: if cols is None: cols = unique_events(log) run_df = boolean(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.FREQUENCY.value: if cols is None: cols = unique_events(log) run_df = frequency(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.COMPLEX.value: run_df = complex(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.LAST_PAYLOAD.value: run_df = last_payload(log, labelling, encoding, additional_columns) # elif encoding.value_encoding == ValueEncodings.SEQUENCES.value: #TODO JONAS # run_df = sequences(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.DECLARE.value: run_df = declare_encoding(log, labelling, encoding, additional_columns, cols=cols) if cols is None: cols = list(run_df.columns) else: raise ValueError("Unknown value encoding method {}".format( encoding.value_encoding)) return run_df, cols
def test_prefix1(self): df = last_payload(self.log, self.labelling, self.encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, ["5", 'register request', "register request", "50", 'Ellen', "Ellen", 0.0, 1576440.0]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, ["4", 'register request', "register request", "50", 'Pete', "Pete", 0.0, 520920.0])
def test_prefix5(self): encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, add_elapsed_time=True, prefix_length=5) df = last_payload(self.log, self.labelling, encoding, self.add_col) self.assertEqual(df.shape, (2, 12)) self.assertFalse(df.isnull().values.any())
def test_prefix1_no_label(self): df = last_payload(self.log, create_test_labelling(label_type=LabelTypes.NO_LABEL.value), self.encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, ["5", 'register request', "register request", "50", 'Ellen', "Ellen"]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, ["4", 'register request', "register request", "50", 'Pete', "Pete"])
def test_prefix10_zero_padding_all_in_one(self): encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, add_elapsed_time=True, prefix_length=10, padding=True) df = last_payload(self.log, self.labelling, encoding, self.add_col) self.assertEqual(df.shape, (15, 17)) self.assertFalse(df.isnull().values.any())
def test_shape(self): encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) df = last_payload(self.log, self.labelling, encoding, self.add_col) self.assertEqual((2, 9), df.shape) headers = ['trace_id', 'prefix_1', 'prefix_2', 'Activity_2', 'Costs_2', 'Resource_2', 'org:resource_2', 'elapsed_time', 'label'] self.assertListEqual(headers, df.columns.values.tolist())
def test_prefix1_no_elapsed_time(self): encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) df = last_payload(self.log, create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, ["5", 'register request', "register request", "50", 'Ellen', "Ellen", 1576440.0]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, ["4", 'register request', "register request", "50", 'Pete', "Pete", 520920.0])
def test_prefix2(self): encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, add_elapsed_time=True, prefix_length=2) df = last_payload(self.log, self.labelling, encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, ["5", 'register request', 'examine casually', "examine casually", "400", "Mike", "Mike", 90840.0, 1485600.0]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, ["4", 'register request', "check ticket", "check ticket", "100", "Mike", "Mike", 75840.0, 445080.0])