Exemple #1
0
 def setUp(self):
     create_test_job()
     create_test_job(job_type='asdf')
     Job.objects.create(type=JobTypes.PREDICTION.value,
                        split=create_test_split(),
                        encoding=None,
                        labelling=None)
 def test_split_avoid_duplication(self):
     split = create_test_split(
         split_type=SplitTypes.SPLIT_SINGLE.value,
         split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
         test_size=0.2,
         original_log=self.log)
     job = create_test_job(split=split,
                           encoding=self.encoding,
                           labelling=self.labelling,
                           clustering=None,
                           create_models=False,
                           predictive_model=self.predictive_model,
                           job_type=JobTypes.PREDICTION.value,
                           hyperparameter_optimizer=None,
                           incremental_train=None)
     training_df1, test_df1 = get_encoded_logs(job)
     split_id1 = job.split.id
     job = create_test_job(split=split,
                           encoding=self.encoding,
                           labelling=self.labelling,
                           clustering=None,
                           create_models=False,
                           predictive_model=self.predictive_model,
                           job_type=JobTypes.PREDICTION.value,
                           hyperparameter_optimizer=None,
                           incremental_train=None)
     training_df2, test_df2 = get_encoded_logs(job)
     split_id2 = job.split.id
     self.assertEqual(split_id1, split_id2)
    def test_attribute_number(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            prefix_length=2,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value)
        labelling = create_test_labelling(
            label_type=LabelTypes.ATTRIBUTE_NUMBER.value,
            attribute_name='AMOUNT')

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 9))
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(
            trace_5,
            ['5', True, True, False, False, False, False, False, False])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(
            trace_4,
            ['4', True, False, True, False, False, False, False, True])
    def test_next_activity_zero_padding_elapsed_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=3)

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 10))
        self.assertTrue('elapsed_time' in df.columns.values.tolist())
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5, [
            '5', True, True, True, False, False, False, False, 181200.0,
            'decide'
        ])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4, [
            '4', True, False, True, False, False, False, True, 171660.0,
            'decide'
        ])
 def test_next_activity_kmeans(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=8, padding=True),
         labelling=create_test_labelling(
             label_type=LabelTypes.NEXT_ACTIVITY.value),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 0.54239884582595577,
             'acc': 0.80995475113122173,
             'true_positive': '--',
             'true_negative': '--',
             'false_negative': '--',
             'false_positive': '--',
             'precision': 0.62344720496894401,
             'recall': 0.5224945442336747,
             'auc': 0.4730604801339352
         })
 def test_class_no_cluster(self):
     self.max_diff = None
     job = create_test_job(
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         split=repair_example(),
         encoding=create_test_encoding(prefix_length=5,
                                       padding=True,
                                       add_elapsed_time=True),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value))
     result, _ = calculate(job)
     del result['elapsed_time']
     self.assertDictEqual(
         result, {
             'f1score': 1.0,
             'acc': 1.0,
             'true_positive': '--',
             'true_negative': '--',
             'false_negative': '--',
             'false_positive': '--',
             'precision': 1.0,
             'recall': 1.0,
             'auc': 0.0
         })
Exemple #7
0
    def test_prediction_task(self):
        job = create_test_job()
        prediction_task(job.id)

        job.refresh_from_db()
        self.assertEqual('completed', job.status)
        self.assertNotEqual({}, job.evaluation)
    def test_no_exceptions(self):
        filtered_labels = [enum.value for enum in LabelTypes]

        filtered_classification_methods = [
            enum.value for enum in ClassificationMethods
        ]

        filtered_encoding_methods = [enum.value for enum in ValueEncodings]

        filtered_padding = [True, False]

        choices = [
            filtered_encoding_methods, filtered_padding,
            filtered_classification_methods, filtered_labels
        ]

        job_combinations = list(itertools.product(*choices))

        for (encoding, padding, method, label) in job_combinations:
            print(encoding, padding, method, label)

            if method == 'nn' and (padding == False or label
                                   == LabelTypes.ATTRIBUTE_STRING.value):
                pass
            job = create_test_job(
                predictive_model=create_test_predictive_model(
                    prediction_method=method),
                encoding=create_test_encoding(value_encoding=encoding,
                                              padding=padding),
                labelling=create_test_labelling(label_type=label))
            # with HidePrints():
            calculate(job)
Exemple #9
0
    def test_next_activity_zero_padding_elapsed_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=10,
            padding=True)

        df, _ = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 13))
        self.assertTrue('elapsed_time' in df.columns.values.tolist())
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5,
                             ['5', 1, 3, 2, 2, 2, 0, 0, 0, 0, 1296240.0, 2])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4, [
            '4', 52903968, 32171502, 17803069, 1149821, 72523760, 0, 0, 0, 0,
            0, 520920.0, 0
        ])
Exemple #10
0
    def test_label_remaining_time_with_elapsed_time_custom_threshold(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            add_elapsed_time=True,
            add_remaining_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=2)
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value,
            threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value,
            threshold=40000)

        df, _ = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 5))
        self.assertListEqual(
            df.columns.values.tolist(),
            ['trace_id', 'prefix_1', 'prefix_2', 'elapsed_time', 'label'])
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5, ['5', 1, 2, 0, 0])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4, ['4', 1, 1, 0, 0])
    def test_get_encoded_logs_Loaded_cache(self):
        job = create_test_job()

        w_cache = get_encoded_logs(job, True)

        cached_loaded_log = LoadedLog.objects.filter(split=job.split)[0]

        cached_train = cached_loaded_log.train_log_path
        cached_test = cached_loaded_log.test_log_path

        os.remove('cache/loaded_log_cache/' + get_digested(cached_train) +
                  '.pickle')

        loaded_from_cache = get_encoded_logs(job, True)

        assert_frame_equal(w_cache[0], loaded_from_cache[0])
        assert_frame_equal(w_cache[1], loaded_from_cache[1])

        os.remove('cache/loaded_log_cache/' + get_digested(cached_test) +
                  '.pickle')

        loaded_from_cache = get_encoded_logs(job, True)

        assert_frame_equal(w_cache[0], loaded_from_cache[0])
        assert_frame_equal(w_cache[1], loaded_from_cache[1])
Exemple #12
0
    def test_default(self):
        job = create_test_job()

        self.assertEqual('created', job.status)
        self.assertIsNotNone(job.created_date)
        self.assertIsNotNone(job.modified_date)
        self.assertIsNone(job.evaluation)
    def test_label_remaining_time_with_elapsed_time_custom_threshold(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value,
            threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value,
            threshold=40000)
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.BOOLEAN.value,
            prefix_length=3,
            add_elapsed_time=True,
            add_remaining_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value)

        _, df = encode_label_logs(
            self.test_log, self.test_log,
            create_test_job(
                encoding=encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 10))
        trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist()
        self.assertListEqual(trace_5, [
            '5', True, True, True, False, False, False, False, 181200.0, False
        ])
        trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist()
        self.assertListEqual(trace_4, [
            '4', True, False, True, False, False, False, True, 171660.0, False
        ])
    def test_get_labelled_logs(self):
        job = create_test_job()
        labelled_logs = get_encoded_logs(job)

        cached_labelled_logs = get_labelled_logs(job)

        assert_frame_equal(labelled_logs[0], cached_labelled_logs[0])
        assert_frame_equal(labelled_logs[1], cached_labelled_logs[1])
 def test_update_nb(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.HOEFFDING_TREE.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='concept:name'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value),
         create_models=True)
     result1, _ = calculate(job)
     job = create_test_job(predictive_model=create_test_predictive_model(
         prediction_method=ClassificationMethods.HOEFFDING_TREE.value),
                           encoding=job.encoding,
                           labelling=create_test_labelling(
                               label_type=LabelTypes.ATTRIBUTE_STRING.value,
                               attribute_name='concept:name'),
                           clustering=job.clustering,
                           incremental_train=job)
     result2, _ = calculate(job)
     del result1['elapsed_time']
     del result2['elapsed_time']
     self.assertDictEqual(
         result1, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
     self.assertDictEqual(
         result2, {
             'f1score': 0.0,
             'acc': 0.0,
             'precision': 0.0,
             'recall': 0.0,
             'true_positive': 0,
             'true_negative': 0,
             'false_negative': 2,
             'false_positive': 0,
             'auc': 0.0
         })
Exemple #16
0
    def test_explain(self):
        split = create_test_split(
            split_type=SplitTypes.SPLIT_DOUBLE.value,
            split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
            test_size=0.2,
            original_log=None,
            train_log=create_test_log(
                log_name='train_explainability.xes',
                log_path='cache/log_cache/test_logs/train_explainability.xes'
            ),
            test_log=create_test_log(
                log_name='test_explainability.xes',
                log_path='cache/log_cache/test_logs/test_explainability.xes'
            )
        )

        predictive_model = create_test_predictive_model(
            predictive_model=PredictiveModels.CLASSIFICATION.value,
            prediction_method=ClassificationMethods.DECISION_TREE.value
        )

        job = create_test_job(
            split=split,
            encoding=create_test_encoding(
                prefix_length=4,
                padding=True,
                value_encoding=ValueEncodings.SIMPLE_INDEX.value
            ),
            labelling=create_test_labelling(label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label'),
            clustering=None,
            create_models=True,
            predictive_model=predictive_model,
            job_type=JobTypes.PREDICTION.value,
            hyperparameter_optimizer=None,
            incremental_train=None
        )

        prediction_task(job.id, do_publish_result=False)
        job.refresh_from_db()

        exp = Explanation.objects.get_or_create(
            type=ExplanationTypes.SHAP.value,
            split=split,
            predictive_model=predictive_model,
            job=job,
            results={}
        )[0]
        training_df_old, test_df_old = get_encoded_logs(job)

        explanation_target = '2_101'
        prefix_target = 'prefix_1'

        explanation = explain(exp, training_df_old, test_df_old, explanation_target, prefix_target)
        training_df_old, test_df_old = get_encoded_logs(job)
        explanation_temp = shap_temporal_stability(exp, training_df_old, test_df_old, explanation_target)

        self.assertTrue(type(explanation) is dict)
        self.assertTrue(type(explanation_temp) is dict)
    def test_str(self):
        job = create_test_job(predictive_model=create_test_predictive_model(
            predictive_model=PredictiveModels.CLASSIFICATION.value,
            prediction_method=ClassificationMethods.RANDOM_FOREST.value))

        self.assertEqual(
            len(job.__str__()),
            len("{created_date: 2019-10-01 09:38:35.245361+00:00, modified_date: 2019-10-01 09:38:35.245655+00:00, error: , status: created, type: prediction, create_models: False, split: {'id': 1, 'type': 'single', 'test_size': 0.2, 'splitting_method': 'sequential', 'original_log_path': 'cache/log_cache/test_logs/general_example.xes'}, encoding: {'data_encoding': 'label_encoder', 'value_encoding': 'simpleIndex', 'add_elapsed_time': False, 'add_remaining_time': False, 'add_executed_events': False, 'add_resources_used': False, 'add_new_traces': False, 'features': {}, 'prefix_length': 1, 'padding': False, 'task_generation_type': 'only'}, labelling: {'type': 'next_activity', 'attribute_name': None, 'threshold_type': 'threshold_mean', 'threshold': 0.0, 'results': {}}, clustering: {'clustering_method': 'noCluster'}, predictive_model: {'n_estimators': 10, 'max_depth': None, 'max_features': 'auto'}, evaluation: [None], hyperparameter_optimizer: [None], incremental_train: [None]}"
                ))
Exemple #18
0
    def test_create_models_config_missing(self):
        job = create_test_job()
        del job.create_models  # TODO fixme should we add this field?
        job.save()
        prediction_task(job.id)

        job.refresh_from_db()
        self.assertEqual('completed', job.status)
        self.assertNotEqual({}, job.evaluation)
 def test_shape_training(self):
     training_df, test_df = encode_label_logs(self.training_log, self.test_log, create_test_job(
         encoding=self.encoding,
         labelling=self.labelling,
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value)
     ))
     self.assert_shape(training_df, (4, 4))
     self.assert_shape(test_df, (2, 4))
 def test_prediction_task_save_model_clustering(self):
     job = create_test_job(
         create_models=True,
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value))
     prediction_task(job.id)
     job.refresh_from_db()
     self.assertEqual('completed', job.status)
     self.assertIsNotNone(job.predictive_model.model_path)
     self.assertIsNotNone(job.clustering.model_path)
Exemple #21
0
 def test_next_activity_DecisionTree(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.DECISION_TREE.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.NEXT_ACTIVITY.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     self.assertDictEqual(result, self.results3())
Exemple #22
0
    def get_classification_job(predictive_model: str, prediction_method: str, metric: HyperOptLosses = HyperOptLosses.ACC.value):
        encoding = create_test_encoding(prefix_length=8, padding=True)
        pred_model = create_test_predictive_model(predictive_model=predictive_model,
                                                  prediction_method=prediction_method)
        hyperparameter_optimizer = create_test_hyperparameter_optimizer(performance_metric=metric)

        job = create_test_job(predictive_model=pred_model,
                              encoding=encoding,
                              hyperparameter_optimizer=hyperparameter_optimizer)
        return job
    def test_replay_prediction(self):
        job = create_test_job(create_models=True)
        runtime_log = create_test_log(
            log_name='runtime_example.xes',
            log_path='cache/log_cache/test_logs/runtime_test.xes')
        log = get_log(runtime_log)
        prediction_task(job.id)
        job.refresh_from_db()

        replay_prediction_task(job, job, log)
Exemple #24
0
 def test_class_randomForest(self):
     job = create_test_job(
         predictive_model=create_test_predictive_model(
             prediction_method=ClassificationMethods.RANDOM_FOREST.value),
         labelling=create_test_labelling(
             label_type=LabelTypes.ATTRIBUTE_STRING.value,
             attribute_name='label'),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.NO_CLUSTER.value))
     result, _ = calculate(job)
     self.assertDictEqual(result, self.results2())
    def test_create_runtime(self):
        job = create_test_job()
        split = create_test_split()
        client = APIClient()
        response = client.post('/runtime/prediction/', {
            'jobId': job.id,
            'splitId': split.id,
        },
                               format='json')

        self.assertEqual(201, response.status_code)
    def test_no_label(self):
        labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 9))
    def test_row_test(self):
        training_df, test_df = encode_label_logs(self.training_log, self.test_log, create_test_job(
            encoding=self.encoding,
            labelling=self.labelling,
            predictive_model=create_test_predictive_model(
                predictive_model=PredictiveModels.CLASSIFICATION.value)
        ))
        row = test_df[(test_df.trace_id == '4')].iloc[0]

        self.assertEqual(1, row.prefix_1)
        self.assertEqual(0, row.elapsed_time)
        self.assertEqual(0, row.label)
    def test_remaining_time(self):
        labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        _, df = encode_label_logs(
            self.train_log, self.test_log,
            create_test_job(
                encoding=self.encoding,
                labelling=labelling,
                predictive_model=create_test_predictive_model(
                    predictive_model=PredictiveModels.CLASSIFICATION.value)))
        self.assertEqual(df.shape, (2, 11))
    def test_get_encoded_logs_cache(self):
        job = create_test_job()

        w_cache = get_encoded_logs(job, True)
        wout_cache = get_encoded_logs(job, False)

        assert_frame_equal(w_cache[0], wout_cache[0])
        assert_frame_equal(w_cache[1], wout_cache[1])

        loaded_from_cache = get_encoded_logs(job, True)

        assert_frame_equal(w_cache[0], loaded_from_cache[0])
        assert_frame_equal(w_cache[1], loaded_from_cache[1])
Exemple #30
0
    def get_regression_job(predictive_model: str, prediction_method: str,
                           metric: HyperOptLosses = HyperOptLosses.ACC.value):
        encoding = create_test_encoding(prefix_length=8, padding=True)
        pred_model = create_test_predictive_model(predictive_model=predictive_model,
                                                  prediction_method=prediction_method)
        hyperparameter_optimizer = create_test_hyperparameter_optimizer(performance_metric=metric)

        job = create_test_job(predictive_model=pred_model,
                              encoding=encoding,
                              labelling=create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value,
                                                              threshold_type=ThresholdTypes.NONE.value),
                              hyperparameter_optimizer=hyperparameter_optimizer)
        return job