def test_rnn_time_series_predictor_complex_no_exceptions(self): encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, prefix_length=5, padding=True) labelling = create_test_labelling( label_type=LabelTypes.DURATION.value, threshold_type=ThresholdTypes.THRESHOLD_MEAN.value) train_df = complex(self.train_log, labelling, encoding, self.train_add_col) test_df = complex(self.test_log, labelling, encoding, self.test_add_col) train_df, targets_df = self._drop_columns_and_split(train_df) test_df, _ = self._drop_columns_and_split(test_df) config = self._get_rnn_default_config( encoding=ValueEncodings.COMPLEX.value) rnn_time_series_predictor = RNNTimeSeriesPredictor(**config) # with HidePrints(): rnn_time_series_predictor.fit(train_df) rnn_time_series_predictor.predict(test_df)
def test_nn_regressor_complex_no_exceptions(self): encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, prefix_length=2, padding=True) labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) train_df = complex(self.train_log, labelling, encoding, self.train_add_col) test_df = complex(self.test_log, labelling, encoding, self.test_add_col) train_df, targets_df = self._drop_columns_and_split(train_df) targets_df = targets_df.values.ravel() test_df, _ = self._drop_columns_and_split(test_df) config = self._get_nn_default_config( encoding=ValueEncodings.COMPLEX.value) nn_regressor = NNRegressor(**config) # with HidePrints(): nn_regressor.fit(train_df, targets_df) nn_regressor.predict(test_df)
def test_complex_encoding_parsing(self): encoding = create_test_encoding(value_encoding=ValueEncodings.COMPLEX.value, prefix_length=2, padding=True) parser = self._get_parser(encoding=ValueEncodings.COMPLEX.value) train_df = complex(self.train_log, self.labelling, encoding, self.train_add_col) test_df = complex(self.test_log, self.labelling, encoding, self.train_add_col) train_df, targets_df = self._drop_columns_and_split(train_df) test_df, _ = self._drop_columns_and_split(test_df) parser.parse_training_dataset(train_df) parser.parse_targets(targets_df) parser.parse_testing_dataset(test_df)
def _eventlog_to_dataframe(log: EventLog, encoding: Encoding, labelling: Labelling, additional_columns=None, cols=None): if encoding.prefix_length < 1: raise ValueError("Prefix length must be greater than 1") if encoding.value_encoding == ValueEncodings.SIMPLE_INDEX.value: run_df = simple_index(log, labelling, encoding) elif encoding.value_encoding == ValueEncodings.BOOLEAN.value: if cols is None: cols = unique_events(log) run_df = boolean(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.FREQUENCY.value: if cols is None: cols = unique_events(log) run_df = frequency(log, cols, labelling, encoding) elif encoding.value_encoding == ValueEncodings.COMPLEX.value: run_df = complex(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.LAST_PAYLOAD.value: run_df = last_payload(log, labelling, encoding, additional_columns) # elif encoding.value_encoding == ValueEncodings.SEQUENCES.value: #TODO JONAS # run_df = sequences(log, labelling, encoding, additional_columns) elif encoding.value_encoding == ValueEncodings.DECLARE.value: run_df = declare_encoding(log, labelling, encoding, additional_columns, cols=cols) if cols is None: cols = list(run_df.columns) else: raise ValueError("Unknown value encoding method {}".format( encoding.value_encoding)) return run_df, cols
def test_prefix10_all_in_one(self): encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, prefix_length=10) df = complex(self.log, self.labelling, encoding, self.add_col) self.assertEqual(df.shape, (10, 55)) self.assertFalse(df.isnull().values.any())
def test_prefix5(self): encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=5) df = complex(self.log, self.labelling, encoding, self.add_col) self.assertEqual(df.shape, (2, 30)) self.assertFalse(df.isnull().values.any())
def test_prefix1(self): df = complex(self.log, self.labelling, self.encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, [ '5', '300', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Ellen', 'Ellen', 0.0, 1576440.0 ]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, [ '4', '100', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Pete', 'Pete', 0.0, 520920.0 ])
def test_prefix1_no_label(self): labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value) df = complex(self.log, labelling, self.encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, [ '5', '300', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Ellen', 'Ellen' ]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, [ '4', '100', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Pete', 'Pete' ])
def test_shape(self): encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) df = complex(self.log, self.labelling, encoding, self.add_col) self.assertEqual((2, 15), df.shape) headers = [ 'trace_id', 'AMOUNT', 'creator', 'prefix_1', 'Activity_1', 'Costs_1', 'Resource_1', 'org:resource_1', 'prefix_2', 'Activity_2', 'Costs_2', 'Resource_2', 'org:resource_2', 'elapsed_time', 'label' ] self.assertListEqual(headers, df.columns.values.tolist())
def test_prefix1_no_elapsed_time(self): encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) df = complex(self.log, LabelContainer(), encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, [ '5', '300', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Ellen', 'Ellen', 1576440.0 ]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, [ '4', '100', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Pete', 'Pete', 520920.0 ])
def test_prefix2(self): encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) df = complex(self.log, self.labelling, encoding, self.add_col) row1 = df[(df.trace_id == '5')].iloc[0].tolist() self.assertListEqual(row1, [ '5', '300', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Ellen', 'Ellen', 'examine casually', 'examine casually', '400', 'Mike', 'Mike', 90840.0, 1485600.0 ]) row2 = df[(df.trace_id == '4')].iloc[0].tolist() self.assertListEqual(row2, [ '4', '100', 'Fluxicon Nitro', 'register request', 'register request', '50', 'Pete', 'Pete', 'check ticket', 'check ticket', '100', 'Mike', 'Mike', 75840.0, 445080.0 ])