Ejemplo n.º 1
0
def split_double():
    return create_test_split(
        split_type=SplitTypes.SPLIT_DOUBLE.value,
        train_log=create_test_log(log_name=general_example_train_filename,
                                  log_path=general_example_train_filepath),
        test_log=create_test_log(log_name=general_example_test_filename,
                                 log_path=general_example_test_filepath))
Ejemplo n.º 2
0
    def test_explain(self):
        split = create_test_split(
            split_type=SplitTypes.SPLIT_DOUBLE.value,
            split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
            test_size=0.2,
            original_log=None,
            train_log=create_test_log(
                log_name='train_explainability.xes',
                log_path='cache/log_cache/test_logs/train_explainability.xes'
            ),
            test_log=create_test_log(
                log_name='test_explainability.xes',
                log_path='cache/log_cache/test_logs/test_explainability.xes'
            )
        )

        predictive_model = create_test_predictive_model(
            predictive_model=PredictiveModels.CLASSIFICATION.value,
            prediction_method=ClassificationMethods.DECISION_TREE.value
        )

        job = create_test_job(
            split=split,
            encoding=create_test_encoding(
                prefix_length=4,
                padding=True,
                value_encoding=ValueEncodings.SIMPLE_INDEX.value
            ),
            labelling=create_test_labelling(label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label'),
            clustering=None,
            create_models=True,
            predictive_model=predictive_model,
            job_type=JobTypes.PREDICTION.value,
            hyperparameter_optimizer=None,
            incremental_train=None
        )

        prediction_task(job.id, do_publish_result=False)
        job.refresh_from_db()

        exp = Explanation.objects.get_or_create(
            type=ExplanationTypes.SHAP.value,
            split=split,
            predictive_model=predictive_model,
            job=job,
            results={}
        )[0]
        training_df_old, test_df_old = get_encoded_logs(job)

        explanation_target = '2_101'
        prefix_target = 'prefix_1'

        explanation = explain(exp, training_df_old, test_df_old, explanation_target, prefix_target)
        training_df_old, test_df_old = get_encoded_logs(job)
        explanation_temp = shap_temporal_stability(exp, training_df_old, test_df_old, explanation_target)

        self.assertTrue(type(explanation) is dict)
        self.assertTrue(type(explanation_temp) is dict)
Ejemplo n.º 3
0
 def test_multiple_unique_events(self):
     test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath_xes))
     training_log = get_log(
         create_test_log(log_path=general_example_train_filepath,
                         log_name=general_example_train_filename))
     events = unique_events2(training_log, test_log)
     self.assertEqual(8, len(events))
 def setUp(self):
     self.train_log = get_log(create_test_log(log_name=general_example_train_filename,
                                              log_path=general_example_train_filepath))
     self.test_log = get_log(create_test_log(log_name=general_example_test_filename,
                                             log_path=general_example_test_filepath))
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.SIMPLE_INDEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=1)
Ejemplo n.º 5
0
    def setUp(self):
        self.train_log = get_log(create_test_log(log_name=general_example_train_filename,
                                                 log_path=general_example_train_filepath))
        self.train_event_names = unique_events(self.train_log)
        self.train_add_col = get_additional_columns(self.train_log)

        self.test_log = get_log(create_test_log(log_name=general_example_test_filename,
                                                log_path=general_example_test_filepath))
        self.test_event_names = unique_events(self.test_log)
        self.test_add_col = get_additional_columns(self.test_log)
 def setUp(self):
     test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     training_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.training_df, self.test_df = encode_label_logs(
         training_log, test_log,
         create_test_job(
             encoding=create_test_encoding(
                 value_encoding=ValueEncodings.BOOLEAN.value,
                 add_elapsed_time=True),
             predictive_model=create_test_predictive_model(
                 predictive_model=PredictiveModels.CLASSIFICATION.value)))
Ejemplo n.º 7
0
 def test_global_event_attributes(self):
     log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath_xes))
     attributes = get_additional_columns(log)
     self.assertListEqual(attributes['event_attributes'],
                          ['Activity', 'Costs', 'Resource', 'org:resource'])
Ejemplo n.º 8
0
 def test_hyperopt(self):
     job = Job.objects.create(
         split=create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value,
                                 original_log=create_test_log(
                                     log_name=general_example_filename,
                                     log_path=general_example_filepath)),
         encoding=create_test_encoding(
             value_encoding=ValueEncodings.SIMPLE_INDEX.value,
             prefix_length=3,
             padding=False),
         labelling=create_test_labelling(
             label_type=LabelTypes.REMAINING_TIME.value),
         clustering=create_test_clustering(
             clustering_type=ClusteringMethods.KMEANS.value),
         predictive_model=create_test_predictive_model(
             predictive_model=PredictiveModels.CLASSIFICATION.value,
             prediction_method=ClassificationMethods.RANDOM_FOREST.value),
         hyperparameter_optimizer=create_test_hyperparameter_optimizer(
             hyperoptim_type=HyperparameterOptimizationMethods.HYPEROPT.
             value,
             performance_metric=HyperOptLosses.ACC.value,
             max_evals=2))
     prediction_task(job.pk)
     job = Job.objects.get(pk=1)
     self.assertFalse(classification_random_forest(
     ) == job.predictive_model.classification.__getattribute__(
         ClassificationMethods.RANDOM_FOREST.value.lower()).to_dict())
Ejemplo n.º 9
0
 def test_event_attributes(self):
     log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     attributes = get_event_attributes(log)
     self.assertListEqual(attributes,
                          ['Activity', 'Costs', 'Resource', 'org:resource'])
Ejemplo n.º 10
0
 def test_trace_attributes(self):
     self.log = get_log(create_test_log(log_name=financial_log_filename,
                                        log_path=financial_log_filepath))
     result = trace_attributes(self.log)
     self.assertEqual(2, len(result))
     self.assertDictEqual({'name': 'AMOUNT_REQ', 'type': 'number', 'example': '20000'},
                          result[0])
     self.assertDictEqual({'name': 'REG_DATE', 'type': 'string', 'example': '2011-10-01 00:38:44.546000+02:00'},
                          result[1])
Ejemplo n.º 11
0
    def setUp(self):
        test_log = get_log(
            create_test_log(log_name=general_example_test_filename,
                            log_path=general_example_test_filepath_xes))
        training_log = get_log(
            create_test_log(log_name=general_example_train_filename,
                            log_path=general_example_train_filepath))
        self.encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=1)
        self.labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        self.training_df, self.test_df = encode_label_logs(
            training_log, test_log,
            create_test_job(encoding=self.encoding, labelling=self.labelling))
Ejemplo n.º 12
0
def split_single(
        split_ordering: str = SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
        test_size: float = 0.2):
    return create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value,
                             split_ordering_method=split_ordering,
                             test_size=test_size,
                             original_log=create_test_log(
                                 log_name=general_example_filename,
                                 log_path=general_example_filepath))
Ejemplo n.º 13
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_test_filename,
                                        log_path=general_example_test_filepath_xes))
     self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.SIMPLE_INDEX.value,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         add_elapsed_time=True,
         prefix_length=1)
Ejemplo n.º 14
0
    def test_replay_prediction(self):
        job = create_test_job(create_models=True)
        runtime_log = create_test_log(
            log_name='runtime_example.xes',
            log_path='cache/log_cache/test_logs/runtime_test.xes')
        log = get_log(runtime_log)
        prediction_task(job.id)
        job.refresh_from_db()

        replay_prediction_task(job, job, log)
 def setUp(self):
     self.train_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     self.add_col = get_additional_columns(self.train_log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=2)
     self.encodingPadding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=10,
         padding=True)
Ejemplo n.º 16
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_test_filename,
                                        log_path=general_example_test_filepath))
     self.event_names = unique_events(self.log)
     self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value)
     self.add_col = get_additional_columns(self.log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.LAST_PAYLOAD.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=1)
Ejemplo n.º 17
0
 def test_post_split_create_split(self):
     log = create_test_log()
     client = APIClient()
     response = client.post(
         '/splits/', {
             'original_log': log.id,
             'splitting_method': 'sequential',
             'test_size': .2
         })
     self.assertEqual(log.id, response.data['original_log'])
     self.assertEqual('sequential', response.data['splitting_method'])
     self.assertEqual(.2, response.data['test_size'])
Ejemplo n.º 18
0
    def test_runtime(self):
        job = create_test_job(create_models=True)
        runtime_log = create_test_log(
            log_name='runtime_example.xes',
            log_path='cache/log_cache/test_logs/runtime_test.xes')

        prediction_task(job.id)
        job.refresh_from_db()
        job.split = create_test_split(
            split_type=SplitTypes.SPLIT_DOUBLE.value,
            split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
            train_log=runtime_log,
            test_log=runtime_log)

        runtime_task(job)
Ejemplo n.º 19
0
    def test_replay(self):

        job = create_test_job()
        runtime_job = duplicate_orm_row(job)

        runtime_log = create_test_log(
            log_name='runtime_example.xes',
            log_path='cache/log_cache/test_logs/runtime_test.xes')
        runtime_job.split = create_test_split(
            split_type=SplitTypes.SPLIT_DOUBLE.value,
            split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
            train_log=runtime_log,
            test_log=runtime_log)

        requests = replay_task(runtime_job, job)
        self.assertEqual(len(requests), 2)
Ejemplo n.º 20
0
    def setUp(self) -> None:
        self.log = create_test_log(
            log_name='general_example.xes',
            log_path='cache/log_cache/test_logs/general_example.xes')

        self.encoding = create_test_encoding(
            prefix_length=4,
            padding=True,
            value_encoding=ValueEncodings.SIMPLE_INDEX.value)

        self.labelling = create_test_labelling(
            label_type=LabelTypes.NEXT_ACTIVITY.value, )

        self.predictive_model = create_test_predictive_model(
            predictive_model=PredictiveModels.CLASSIFICATION.value,
            prediction_method=ClassificationMethods.DECISION_TREE.value)
    def test_eval(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            add_elapsed_time=True,
            prefix_length=12,
            padding=True)
        df = simple_index(
            get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)),
            create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding)

        self.assertEqual(df.shape, (41, 15))
        row1 = df[df.trace_id == '4'].iloc[4]
        self.assertListEqual(
            ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0,
             0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist())
        self.assertFalse(df.isnull().values.any())
Ejemplo n.º 22
0
    def test_create_multiple(self):
        create_test_split(original_log=create_test_log())

        client = APIClient()
        response = client.post('/jobs/multiple', {
            'type': 'classification',
            'split_id': 1,
            'config': {
                'clusterings': ['noCluster'],
                'encodings': ['simpleIndex'],
                'encoding': {
                    'padding': False,
                    'prefix_length': 1,
                    'generation_type': 'only',
                    'add_remaining_time': False,
                    'add_elapsed_time': False,
                    'add_executed_events': False,
                    'add_resources_used': False,
                    'add_new_traces': False,
                    'features': [],
                },
                'create_models': False,
                'methods': ['randomForest', 'decisionTree', 'rnn'],
                'kmeans': {},
                'incremental_train': {
                    'base_model': None,
                },
                'hyperparameter_optimizer': {
                    'algorithm_type': 'tpe',
                    'max_evaluations': 10,
                    'performance_metric': 'rmse',
                    'type': 'none',
                },
                'labelling': {
                    'type': 'next_activity',
                    'attribute_name': '',
                    'threshold_type': 'threshold_mean',
                    'threshold': 0,
                }
            }}, format='json')

        self.assertEqual(3, len(response.data))
Ejemplo n.º 23
0
    def test_replay(self):

        job = create_test_job()
        # runtime_job = duplicate_orm_row(job)  #todo: replace with simple CREATE
        runtime_job = Job.objects.create(
            created_date=job.created_date,
            modified_date=job.modified_date,
            error=job.error,
            status=job.status,
            type=job.type,
            create_models=job.create_models,
            case_id=job.case_id,
            event_number=job.event_number,
            gold_value=job.gold_value,
            results=job.results,
            parent_job=job.parent_job,
            split=job.split,
            encoding=job.encoding,
            labelling=job.labelling,
            clustering=job.clustering,
            predictive_model=job.predictive_model,
            evaluation=job.evaluation,
            hyperparameter_optimizer=job.hyperparameter_optimizer,
            incremental_train=job.incremental_train)

        runtime_log = create_test_log(
            log_name='runtime_example.xes',
            log_path='cache/log_cache/test_logs/runtime_test.xes')
        runtime_job.split = create_test_split(
            split_type=SplitTypes.SPLIT_DOUBLE.value,
            split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
            train_log=runtime_log,
            test_log=runtime_log)

        requests = replay_task(runtime_job, job)
        self.assertEqual(len(requests), 6)
Ejemplo n.º 24
0
 def test_labelling_job_creation(self):
     client = APIClient()
     response = client.post('/jobs/multiple', self.job_label(create_test_split(original_log=create_test_log()).id), format='json')
     self.assertEqual(status.HTTP_201_CREATED, response.status_code)
     self.assertEqual(len(response.data), 1)
     self.assertEqual(response.data[0]['type'], 'labelling')
     self.assertEqual(response.data[0]['config']['encoding']['value_encoding'], 'simpleIndex')
     self.assertEqual(response.data[0]['config']['encoding']['prefix_length'], 3)
     self.assertEqual(response.data[0]['config']['labelling'],
                      {'type': 'remaining_time', 'attribute_name': None,
                       'threshold_type': ThresholdTypes.THRESHOLD_MEAN.value,
                       'threshold': 0, 'results': {}})
     self.assertEqual(response.data[0]['config']['encoding']['padding'], True)
     self.assertEqual(response.data[0]['status'], 'created')
Ejemplo n.º 25
0
def repair_example():
    return create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value,
                             original_log=create_test_log(
                                 log_name='repair_example.xes',
                                 log_path=repair_example_filepath))
Ejemplo n.º 26
0
def split_single():
    return create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value,
                             original_log=create_test_log(
                                 log_name=general_example_filename,
                                 log_path=general_example_filepath))
Ejemplo n.º 27
0
 def setUp(self):
     log = create_test_log(log_name=general_example_filepath,
                           log_path=general_example_filepath)
     create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value,
                       original_log=log)
Ejemplo n.º 28
0
    def test_reg_job_creation(self):

        client = APIClient()
        response = client.post('/jobs/multiple', self.job_obj2(create_test_split(original_log=create_test_log()).id), format='json')

        self.assertEqual(status.HTTP_201_CREATED, response.status_code)
        self.assertEqual(1, len(response.data))
        self.assertEqual(response.data[0]['type'], 'prediction')
        self.assertEqual(response.data[0]['config']['predictive_model']['predictive_model'], 'regression')
        self.assertEqual(ValueEncodings.SIMPLE_INDEX.value, response.data[0]['config']['encoding']['value_encoding'])
        self.assertEqual(ClusteringMethods.NO_CLUSTER.value, response.data[0]['config']['clustering']['clustering_method'])
        self.assertEqual(RegressionMethods.LINEAR.value,
                         response.data[0]['config']['predictive_model']['prediction_method'])
        self.assertEqual(2, response.data[0]['config']['encoding']['prefix_length'])
        self.assertEqual(False, response.data[0]['config']['encoding']['padding'])
        self.assertEqual(JobStatuses.CREATED.value, response.data[0]['status'])
Ejemplo n.º 29
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_filename,
                                        log_path=general_example_filepath))
Ejemplo n.º 30
0
    def test_explain(self):
        split = create_test_split(
            split_type=SplitTypes.SPLIT_DOUBLE.value,
            split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value,
            test_size=0.2,
            original_log=None,
            train_log=create_test_log(
                log_name='train_explainability.xes',
                log_path='cache/log_cache/test_logs/train_explainability.xes'),
            test_log=create_test_log(
                log_name='test_explainability.xes',
                log_path='cache/log_cache/test_logs/test_explainability.xes'))

        predictive_model = create_test_predictive_model(
            predictive_model=PredictiveModels.CLASSIFICATION.value,
            prediction_method=ClassificationMethods.DECISION_TREE.value)

        job = create_test_job(
            split=split,
            encoding=create_test_encoding(
                prefix_length=4,
                padding=True,
                value_encoding=ValueEncodings.SIMPLE_INDEX.value),
            labelling=create_test_labelling(
                label_type=LabelTypes.ATTRIBUTE_STRING.value,
                attribute_name='label'),
            clustering=None,
            create_models=True,
            predictive_model=predictive_model,
            job_type=JobTypes.PREDICTION.value,
            hyperparameter_optimizer=None,
            incremental_train=None)

        prediction_task(job.id, do_publish_result=False)
        job.refresh_from_db()

        exp = Explanation.objects.get_or_create(
            type=ExplanationTypes.ICE.value,
            split=split,
            predictive_model=predictive_model,
            job=job,
            results={})[0]
        training_df_old, test_df_old = get_encoded_logs(job)

        explanation_target = 'prefix_2'

        explanation = explain(exp,
                              training_df_old,
                              test_df_old,
                              explanation_target,
                              prefix_target=None)

        expected = [{
            'value': 'Contact Hospital',
            'label': 1.2962962962962963,
            'count': 351
        }, {
            'value': 'Create Questionnaire',
            'label': 1.5526992287917738,
            'count': 1167
        }, {
            'value': 'High Insurance Check',
            'label': 1.2667660208643816,
            'count': 671
        }]

        self.assertEqual(expected, explanation)