def _eventlog_to_dataframe(log: EventLog, encoding: Encoding, labelling: Labelling, additional_columns=None, cols=None): if encoding.prefix_length < 1: raise ValueError("Prefix length must be greater than 1") if encoding.value_encoding == ValueEncodings.SIMPLE_INDEX.value: run_df = simple_index(log, labelling, encoding) elif encoding.value_encoding == ValueEncodings.BOOLEAN.value: if cols is None: cols = unique_events(log) run_df = boolean(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.FREQUENCY.value: if cols is None: cols = unique_events(log) run_df = frequency(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.COMPLEX.value: run_df = complex(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.LAST_PAYLOAD.value: run_df = last_payload(log, labelling, encoding, additional_columns) # elif encoding.value_encoding == ValueEncodings.SEQUENCES.value: #TODO JONAS # run_df = sequences(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.DECLARE.value: run_df = declare_encoding(log, labelling, encoding, additional_columns, cols=cols) if cols is None: cols = list(run_df.columns) else: raise ValueError("Unknown value encoding method {}".format( encoding.value_encoding)) return run_df, cols
def setUp(self): self.train_log = get_log(create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.train_event_names = unique_events(self.train_log) self.train_add_col = get_additional_columns(self.train_log) self.test_log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.test_event_names = unique_events(self.test_log) self.test_add_col = get_additional_columns(self.test_log)
def method_self(self, encoding): start_time = time.time() # log = get_logs("log_cache/repairExample.xes")[0] event_names = unique_events(self.log) encode_label_log(self.log, encoding, PredictiveModels.REGRESSION.value, self.label, event_names=event_names, additional_columns=self.add_col) print("Total for %s %s seconds" % (encoding, time.time() - start_time))
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.event_names = unique_events(self.log) self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) self.add_col = get_additional_columns(self.log) self.encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1)
def do_test(self, encoding, log): start_time = time.time() # log = get_logs(log_path)[0] add_col = get_additional_columns(log) event_names = unique_events(log) encoding = EncodingContainer(encoding, prefix_length=20, padding=ZERO_PADDING) log = encode_label_log(log, encoding, PredictiveModels.REGRESSION.value, self.label, event_names=event_names, additional_columns=add_col) print(log.shape) print("Total for %s %s seconds" % (encoding.method, time.time() - start_time))
def do_test(encoding): start_time = time.time() # log = get_logs("log_cache/general_example.xes")[0] log = get_log("cache/log_cache/Sepsis Cases - Event Log.xes") label = LabelContainer(LabelTypes.REMAINING_TIME.value, add_elapsed_time=True) encoding = EncodingContainer(encoding, prefix_length=185, generation_type=ALL_IN_ONE, padding=ZERO_PADDING) event_names = unique_events(log) log = encode_label_log(log, encoding, PredictiveModels.REGRESSION.value, label, event_names=event_names) print(log.shape) print("Total for %s %s seconds" % (encoding, time.time() - start_time))
def test_unique_events(self): events = unique_events(self.log) self.assertEqual(7, len(events))