Beispiel #1
0
def prepare_logs(split: Split):
    """Returns training_log and test_log"""
    if split.type == SplitTypes.SPLIT_SINGLE.value:
        additional_columns = get_additional_columns(get_log(
            split.original_log))
        training_log, test_log = _split_single_log(split)
        logger.info("\t\tLoaded single log from {}".format(
            split.original_log.path))
    else:
        # Have to use sklearn to convert some internal data types
        training_log = get_log(split.train_log)
        additional_columns = get_additional_columns(training_log)
        if split.additional_columns is None:
            split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes'
            split.save()
        training_log, _ = train_test_split(training_log,
                                           test_size=0,
                                           shuffle=False)
        test_log, _ = train_test_split(get_log(split.test_log),
                                       test_size=0,
                                       shuffle=False)
        logger.info("\t\tLoaded double logs from {} and {}.".format(
            split.train_log.path, split.test_log.path))
    if len(training_log) == 0:
        raise TypeError(
            "Training log is empty. Create a new Split with better parameters")
    return training_log, test_log, additional_columns
Beispiel #2
0
    def setUp(self):
        self.train_log = get_log(create_test_log(log_name=general_example_train_filename,
                                                 log_path=general_example_train_filepath))
        self.train_event_names = unique_events(self.train_log)
        self.train_add_col = get_additional_columns(self.train_log)

        self.test_log = get_log(create_test_log(log_name=general_example_test_filename,
                                                log_path=general_example_test_filepath))
        self.test_event_names = unique_events(self.test_log)
        self.test_add_col = get_additional_columns(self.test_log)
Beispiel #3
0
def get_train_test_log(split: Split):
    """Returns training_log and test_log"""
    if split.type == SplitTypes.SPLIT_SINGLE.value and Split.objects.filter(
        type=SplitTypes.SPLIT_DOUBLE.value,
        original_log=split.original_log,
        test_size=split.test_size,
        splitting_method=split.splitting_method
    ).exists() and split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
        return get_train_test_log(Split.objects.filter(
            type=SplitTypes.SPLIT_DOUBLE.value,
            original_log=split.original_log,
            test_size=split.test_size,
            splitting_method=split.splitting_method
        )[0])
    elif split.original_log is not None and (not Split.objects.filter(
        type=SplitTypes.SPLIT_DOUBLE.value,
        original_log=split.original_log,
        test_size=split.test_size,
        splitting_method=split.splitting_method
    ).exists() or split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value):
        training_log, test_log = _split_single_log(split)
        additional_columns = get_additional_columns(get_log(split.original_log))

        if split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
            _ = Split.objects.get_or_create(
                type=SplitTypes.SPLIT_DOUBLE.value,
                original_log=split.original_log,
                test_size=split.test_size,
                splitting_method=split.splitting_method,
                train_log=create_log(EventLog(training_log), '0-' + str(100 - int(split.test_size * 100)) + '.xes'),
                test_log=create_log(EventLog(test_log), str(100 - int(split.test_size * 100)) + '-100.xes'),
                additional_columns=split.additional_columns
            )[0]

        logger.info("\t\tLoaded single log from {}".format(split.original_log.path))
    else:
        # Have to use sklearn to convert some internal data types
        training_log = get_log(split.train_log)
        additional_columns = get_additional_columns(training_log)
        if split.additional_columns is None:
            split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes'
            split.save()
        training_log, train_log_to_append = train_test_split(training_log, test_size=0, shuffle=False)
        test_log, test_log_to_append = train_test_split(get_log(split.test_log), test_size=0, shuffle=False)
        logger.info("\t\tLoaded double logs from {} and {}.".format(split.train_log.path, split.test_log.path))
    if len(training_log) == 0:
        raise TypeError("Training log is empty. Create a new Split with better parameters")
    return training_log, test_log, additional_columns
Beispiel #4
0
 def test_global_event_attributes(self):
     log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath_xes))
     attributes = get_additional_columns(log)
     self.assertListEqual(attributes['event_attributes'],
                          ['Activity', 'Costs', 'Resource', 'org:resource'])
Beispiel #5
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_test_filename,
                                        log_path=general_example_test_filepath))
     self.event_names = unique_events(self.log)
     self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value)
     self.add_col = get_additional_columns(self.log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.LAST_PAYLOAD.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=1)
 def do_test(self, encoding, log):
     start_time = time.time()
     # log = get_logs(log_path)[0]
     add_col = get_additional_columns(log)
     event_names = unique_events(log)
     encoding = EncodingContainer(encoding, prefix_length=20, padding=ZERO_PADDING)
     log = encode_label_log(log, encoding, PredictiveModels.REGRESSION.value, self.label,
                            event_names=event_names,
                            additional_columns=add_col)
     print(log.shape)
     print("Total for %s %s seconds" % (encoding.method, time.time() - start_time))
    def test_no_label(self):
        labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.train_log))
        self.assertEqual((2, 12), df.shape)
    def test_next_activity(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.train_log))
        self.assertEqual(df.shape, (2, 14))
    def test_remaining_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.train_log))
        self.assertEqual(df.shape, (2, 14))
Beispiel #10
0
def replay_prediction_calculate(job: Job, log) -> dict:
    """calculate the prediction for the log coming from replayers

    :param job: job idctionary
    :param log: log model
    :return: runtime results
    """

    additional_columns = get_additional_columns(log)
    data_df, _ = train_test_split(log, test_size=0, shuffle=False)
    data_df, _ = encode_label_logs(data_df, EventLog(), job, additional_columns)
    results = MODEL[job.predictive_model.predictive_model][ModelActions.PREDICT.value](job, data_df)
    logger.info("End {} job {}, {} . Results {}".format('runtime', job.predictive_model.predictive_model, get_run(job), results))
    return results
    def test_attribute_number(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.ATTRIBUTE_NUMBER.value,
            attribute_name='AMOUNT')

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.test_log))
        self.assertEqual(df.shape, (2, 15))
 def setUp(self):
     self.train_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     self.add_col = get_additional_columns(self.train_log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=2)
     self.encodingPadding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=10,
         padding=True)
    def test_no_label_zero_padding(self):
        # add things have no effect
        labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            add_elapsed_time=True,
            add_remaining_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10,
            padding=True)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.train_log))
        self.assertEqual(df.shape, (2, 52))
    def test_add_new_traces(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            prefix_length=2,
            add_new_traces=True,
            add_elapsed_time=True)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.train_log))
        self.assertEqual(df.shape, (2, 15))
        self.assertTrue('new_traces' in df.columns.values.tolist())
        self.assertListEqual(df['new_traces'].tolist(), [0, 0])
    def test_next_activity_zero_padding_elapsed_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            add_elapsed_time=True,
            add_remaining_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10,
            padding=True)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.train_log))
        self.assertEqual(df.shape, (2, 55))
        self.assertTrue('elapsed_time' in df.columns.values.tolist())
    def test_label_remaining_time_with_elapsed_time_custom_threshold(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value,
            threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value,
            threshold=40000)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.COMPLEX.value,
            add_elapsed_time=True,
            add_remaining_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10,
            padding=True)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)),
            get_additional_columns(self.test_log))
        self.assertEqual(df.shape, (2, 55))
 def setUp(self):
     self.log = get_log("cache/log_cache/repairExample.xes")
     # self.log = get_logs("log_cache/BPI Challenge 2017.xes.gz")[0]
     self.label = LabelContainer(LabelTypes.NO_LABEL.value)
     self.add_col = get_additional_columns(self.log)