def test_rnn_time_series_predictor_complex_no_exceptions(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            prefix_length=5,
            padding=True)
        labelling = create_test_labelling(
            label_type=LabelTypes.DURATION.value,
            threshold_type=ThresholdTypes.THRESHOLD_MEAN.value)

        train_df = complex(self.train_log, labelling, encoding,
                           self.train_add_col)
        test_df = complex(self.test_log, labelling, encoding,
                          self.test_add_col)

        train_df, targets_df = self._drop_columns_and_split(train_df)

        test_df, _ = self._drop_columns_and_split(test_df)

        config = self._get_rnn_default_config(
            encoding=ValueEncodings.COMPLEX.value)
        rnn_time_series_predictor = RNNTimeSeriesPredictor(**config)

        # with HidePrints():
        rnn_time_series_predictor.fit(train_df)
        rnn_time_series_predictor.predict(test_df)
    def test_nn_regressor_complex_no_exceptions(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            prefix_length=2,
            padding=True)
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        train_df = complex(self.train_log, labelling, encoding,
                           self.train_add_col)
        test_df = complex(self.test_log, labelling, encoding,
                          self.test_add_col)

        train_df, targets_df = self._drop_columns_and_split(train_df)
        targets_df = targets_df.values.ravel()

        test_df, _ = self._drop_columns_and_split(test_df)

        config = self._get_nn_default_config(
            encoding=ValueEncodings.COMPLEX.value)
        nn_regressor = NNRegressor(**config)

        # with HidePrints():
        nn_regressor.fit(train_df, targets_df)
        nn_regressor.predict(test_df)
Beispiel #3
0
    def test_complex_encoding_parsing(self):
        encoding = create_test_encoding(value_encoding=ValueEncodings.COMPLEX.value, prefix_length=2, padding=True)

        parser = self._get_parser(encoding=ValueEncodings.COMPLEX.value)
        train_df = complex(self.train_log, self.labelling, encoding, self.train_add_col)
        test_df = complex(self.test_log, self.labelling, encoding, self.train_add_col)

        train_df, targets_df = self._drop_columns_and_split(train_df)

        test_df, _ = self._drop_columns_and_split(test_df)

        parser.parse_training_dataset(train_df)
        parser.parse_targets(targets_df)
        parser.parse_testing_dataset(test_df)
Beispiel #4
0
def _eventlog_to_dataframe(log: EventLog,
                           encoding: Encoding,
                           labelling: Labelling,
                           additional_columns=None,
                           cols=None):
    if encoding.prefix_length < 1:
        raise ValueError("Prefix length must be greater than 1")
    if encoding.value_encoding == ValueEncodings.SIMPLE_INDEX.value:
        run_df = simple_index(log, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.BOOLEAN.value:
        if cols is None:
            cols = unique_events(log)
        run_df = boolean(log, cols, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.FREQUENCY.value:
        if cols is None:
            cols = unique_events(log)
        run_df = frequency(log, cols, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.COMPLEX.value:
        run_df = complex(log, labelling, encoding, additional_columns)
    elif encoding.value_encoding == ValueEncodings.LAST_PAYLOAD.value:
        run_df = last_payload(log, labelling, encoding, additional_columns)
    # elif encoding.value_encoding == ValueEncodings.SEQUENCES.value: #TODO JONAS
    #     run_df = sequences(log, labelling, encoding, additional_columns)
    elif encoding.value_encoding == ValueEncodings.DECLARE.value:
        run_df = declare_encoding(log,
                                  labelling,
                                  encoding,
                                  additional_columns,
                                  cols=cols)
        if cols is None:
            cols = list(run_df.columns)
    else:
        raise ValueError("Unknown value encoding method {}".format(
            encoding.value_encoding))
    return run_df, cols
    def test_prefix10_all_in_one(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            prefix_length=10)
        df = complex(self.log, self.labelling, encoding, self.add_col)

        self.assertEqual(df.shape, (10, 55))
        self.assertFalse(df.isnull().values.any())
    def test_prefix5(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=5)
        df = complex(self.log, self.labelling, encoding, self.add_col)

        self.assertEqual(df.shape, (2, 30))
        self.assertFalse(df.isnull().values.any())
    def test_prefix1(self):
        df = complex(self.log, self.labelling, self.encoding, self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1, [
            '5', '300', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Ellen', 'Ellen', 0.0, 1576440.0
        ])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2, [
            '4', '100', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Pete', 'Pete', 0.0, 520920.0
        ])
    def test_prefix1_no_label(self):
        labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value)
        df = complex(self.log, labelling, self.encoding, self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1, [
            '5', '300', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Ellen', 'Ellen'
        ])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2, [
            '4', '100', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Pete', 'Pete'
        ])
    def test_shape(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=2)
        df = complex(self.log, self.labelling, encoding, self.add_col)

        self.assertEqual((2, 15), df.shape)
        headers = [
            'trace_id', 'AMOUNT', 'creator', 'prefix_1', 'Activity_1',
            'Costs_1', 'Resource_1', 'org:resource_1', 'prefix_2',
            'Activity_2', 'Costs_2', 'Resource_2', 'org:resource_2',
            'elapsed_time', 'label'
        ]
        self.assertListEqual(headers, df.columns.values.tolist())
    def test_prefix1_no_elapsed_time(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=1)
        df = complex(self.log, LabelContainer(), encoding, self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1, [
            '5', '300', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Ellen', 'Ellen', 1576440.0
        ])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2, [
            '4', '100', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Pete', 'Pete', 520920.0
        ])
    def test_prefix2(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=2)
        df = complex(self.log, self.labelling, encoding, self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1, [
            '5', '300', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Ellen', 'Ellen', 'examine casually',
            'examine casually', '400', 'Mike', 'Mike', 90840.0, 1485600.0
        ])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2, [
            '4', '100', 'Fluxicon Nitro', 'register request',
            'register request', '50', 'Pete', 'Pete', 'check ticket',
            'check ticket', '100', 'Mike', 'Mike', 75840.0, 445080.0
        ])