예제 #1
0
    def test_boolean_encoding_parsing(self):
        encoding = create_test_encoding(value_encoding=ValueEncodings.BOOLEAN.value, prefix_length=2, padding=True)

        parser = self._get_parser(encoding=ValueEncodings.BOOLEAN.value)
        train_df = boolean(self.train_log, self.train_event_names, self.labelling, encoding)
        test_df = boolean(self.test_log, self.test_event_names, self.labelling, encoding)

        train_df, targets_df = self._drop_columns_and_split(train_df)

        test_df, _ = self._drop_columns_and_split(test_df)

        parser.parse_training_dataset(train_df)
        parser.parse_targets(targets_df)
        parser.parse_testing_dataset(test_df)
예제 #2
0
def _eventlog_to_dataframe(log: EventLog,
                           encoding: Encoding,
                           labelling: Labelling,
                           additional_columns=None,
                           cols=None):
    if encoding.prefix_length < 1:
        raise ValueError("Prefix length must be greater than 1")
    if encoding.value_encoding == ValueEncodings.SIMPLE_INDEX.value:
        run_df = simple_index(log, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.BOOLEAN.value:
        if cols is None:
            cols = unique_events(log)
        run_df = boolean(log, cols, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.FREQUENCY.value:
        if cols is None:
            cols = unique_events(log)
        run_df = frequency(log, cols, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.COMPLEX.value:
        run_df = complex(log, labelling, encoding, additional_columns)
    elif encoding.value_encoding == ValueEncodings.LAST_PAYLOAD.value:
        run_df = last_payload(log, labelling, encoding, additional_columns)
    # elif encoding.value_encoding == ValueEncodings.SEQUENCES.value: #TODO JONAS
    #     run_df = sequences(log, labelling, encoding, additional_columns)
    elif encoding.value_encoding == ValueEncodings.DECLARE.value:
        run_df = declare_encoding(log,
                                  labelling,
                                  encoding,
                                  additional_columns,
                                  cols=cols)
        if cols is None:
            cols = list(run_df.columns)
    else:
        raise ValueError("Unknown value encoding method {}".format(
            encoding.value_encoding))
    return run_df, cols
예제 #3
0
 def test_header(self):
     df = boolean(self.log, self.event_names, self.labelling, self.encoding)
     names = ['register request', 'examine casually', 'check ticket', 'decide',
              'reinitiate request', 'examine thoroughly',
              'reject request', 'trace_id', 'label', 'elapsed_time']
     for name in names:
         self.assertIn(name, df.columns.values.tolist())
    def test_prefix1_no_elapsed_time(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=1)
        df = boolean(self.log, self.event_names, self.labelling, encoding)

        self.assertEqual(df.shape, (2, 9))
        self.assertNotIn('elapsed_time', df.columns.values.tolist())
예제 #5
0
    def test_prefix1(self):
        df = boolean(self.log, self.event_names, self.labelling, self.encoding)

        self.assertEqual(df.shape, (2, 10))
        row1 = df[df.trace_id == '5'].iloc[0]
        self.assertTrue(row1['register request'])
        self.assertFalse(row1['examine casually'])
        self.assertEqual(1576440.0, row1.label)
        row2 = df[df.trace_id == '4'].iloc[0]
        self.assertTrue(row2['register request'])
        self.assertFalse(row2['examine casually'])
        self.assertEqual(520920.0, row2.label)
예제 #6
0
    def test_prefix10_padding_all_in_one(self):
        encoding = create_test_encoding(value_encoding=ValueEncodings.BOOLEAN.value,
                                        prefix_length=10,
                                        add_elapsed_time=True,
                                        padding=True,
                                        task_generation_type=ALL_IN_ONE)
        df = boolean(self.log, self.event_names, self.labelling, encoding)

        self.assertEqual(df.shape, (15, 10))
        row1 = df[df.trace_id == '4'].iloc[4]
        self.assertListEqual(['4', True, False, True, True, False, True, True, 520920.0, 0.0], row1.values.tolist())
        self.assertFalse(df.isnull().values.any())
예제 #7
0
    def test_prefix10_padding(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10,
            padding=True)
        df = boolean(self.log, self.event_names, self.labelling, encoding)

        self.assertEqual(df.shape, (2, 10))
        row1 = df[df.trace_id == '4'].iloc[0]
        self.assertListEqual(['4', True, False, True, True, False, True, True, 520920.0, 0.0], row1.values.tolist())
예제 #8
0
    def test_prefix2(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=2)
        df = boolean(self.log, self.event_names, self.labelling, encoding)

        self.assertEqual(df.shape, (2, 10))
        row1 = df[df.trace_id == '5'].iloc[0]
        self.assertTrue(row1['register request'])
        self.assertTrue(row1['examine casually'])
        self.assertEqual(1485600.0, row1.label)
        row2 = df[df.trace_id == '4'].iloc[0]
        self.assertTrue(row2['register request'])
        self.assertFalse(row2['examine casually'])
        self.assertTrue(row2['check ticket'])
        self.assertEqual(445080.0, row2.label)
예제 #9
0
    def test_prefix1_no_label(self):
        labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value)
        df = boolean(self.log, self.event_names, labelling, self.encoding)

        self.assertEqual(df.shape, (2, 8))
        self.assertNotIn('label', df.columns.values.tolist())