예제 #1
0
def _eventlog_to_dataframe(log: EventLog,
                           encoding: Encoding,
                           labelling: Labelling,
                           additional_columns=None,
                           cols=None):
    if encoding.prefix_length < 1:
        raise ValueError("Prefix length must be greater than 1")
    if encoding.value_encoding == ValueEncodings.SIMPLE_INDEX.value:
        run_df = simple_index(log, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.BOOLEAN.value:
        if cols is None:
            cols = unique_events(log)
        run_df = boolean(log, cols, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.FREQUENCY.value:
        if cols is None:
            cols = unique_events(log)
        run_df = frequency(log, cols, labelling, encoding)
    elif encoding.value_encoding == ValueEncodings.COMPLEX.value:
        run_df = complex(log, labelling, encoding, additional_columns)
    elif encoding.value_encoding == ValueEncodings.LAST_PAYLOAD.value:
        run_df = last_payload(log, labelling, encoding, additional_columns)
    # elif encoding.value_encoding == ValueEncodings.SEQUENCES.value: #TODO JONAS
    #     run_df = sequences(log, labelling, encoding, additional_columns)
    elif encoding.value_encoding == ValueEncodings.DECLARE.value:
        run_df = declare_encoding(log,
                                  labelling,
                                  encoding,
                                  additional_columns,
                                  cols=cols)
        if cols is None:
            cols = list(run_df.columns)
    else:
        raise ValueError("Unknown value encoding method {}".format(
            encoding.value_encoding))
    return run_df, cols
예제 #2
0
    def test_prefix1(self):
        df = last_payload(self.log, self.labelling, self.encoding, self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1,
                             ["5", 'register request', "register request", "50", 'Ellen', "Ellen", 0.0, 1576440.0])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2,
                             ["4", 'register request', "register request", "50", 'Pete', "Pete", 0.0, 520920.0])
예제 #3
0
    def test_prefix5(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.LAST_PAYLOAD.value,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            add_elapsed_time=True,
            prefix_length=5)
        df = last_payload(self.log, self.labelling, encoding, self.add_col)

        self.assertEqual(df.shape, (2, 12))
        self.assertFalse(df.isnull().values.any())
예제 #4
0
    def test_prefix1_no_label(self):
        df = last_payload(self.log, create_test_labelling(label_type=LabelTypes.NO_LABEL.value), self.encoding,
                          self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1,
                             ["5", 'register request', "register request", "50", 'Ellen', "Ellen"])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2,
                             ["4", 'register request', "register request", "50", 'Pete', "Pete"])
예제 #5
0
    def test_prefix10_zero_padding_all_in_one(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.LAST_PAYLOAD.value,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            add_elapsed_time=True,
            prefix_length=10, padding=True)
        df = last_payload(self.log, self.labelling, encoding, self.add_col)

        self.assertEqual(df.shape, (15, 17))
        self.assertFalse(df.isnull().values.any())
예제 #6
0
    def test_shape(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.LAST_PAYLOAD.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=2)
        df = last_payload(self.log, self.labelling, encoding, self.add_col)

        self.assertEqual((2, 9), df.shape)
        headers = ['trace_id', 'prefix_1', 'prefix_2', 'Activity_2', 'Costs_2',
                   'Resource_2', 'org:resource_2', 'elapsed_time', 'label']
        self.assertListEqual(headers, df.columns.values.tolist())
예제 #7
0
    def test_prefix1_no_elapsed_time(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.LAST_PAYLOAD.value,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=1)
        df = last_payload(self.log, create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding,
                          self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1,
                             ["5", 'register request', "register request", "50", 'Ellen', "Ellen", 1576440.0])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2,
                             ["4", 'register request', "register request", "50", 'Pete', "Pete", 520920.0])
예제 #8
0
    def test_prefix2(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.LAST_PAYLOAD.value,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            add_elapsed_time=True,
            prefix_length=2)
        df = last_payload(self.log, self.labelling, encoding, self.add_col)

        row1 = df[(df.trace_id == '5')].iloc[0].tolist()
        self.assertListEqual(row1,
                             ["5", 'register request', 'examine casually', "examine casually", "400", "Mike", "Mike",
                              90840.0, 1485600.0])
        row2 = df[(df.trace_id == '4')].iloc[0].tolist()
        self.assertListEqual(row2,
                             ["4", 'register request', "check ticket", "check ticket", "100", "Mike", "Mike", 75840.0,
                              445080.0])