def split_double(): return create_test_split( split_type=SplitTypes.SPLIT_DOUBLE.value, train_log=create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath), test_log=create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath))
def test_explain(self): split = create_test_split( split_type=SplitTypes.SPLIT_DOUBLE.value, split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value, test_size=0.2, original_log=None, train_log=create_test_log( log_name='train_explainability.xes', log_path='cache/log_cache/test_logs/train_explainability.xes' ), test_log=create_test_log( log_name='test_explainability.xes', log_path='cache/log_cache/test_logs/test_explainability.xes' ) ) predictive_model = create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value, prediction_method=ClassificationMethods.DECISION_TREE.value ) job = create_test_job( split=split, encoding=create_test_encoding( prefix_length=4, padding=True, value_encoding=ValueEncodings.SIMPLE_INDEX.value ), labelling=create_test_labelling(label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label'), clustering=None, create_models=True, predictive_model=predictive_model, job_type=JobTypes.PREDICTION.value, hyperparameter_optimizer=None, incremental_train=None ) prediction_task(job.id, do_publish_result=False) job.refresh_from_db() exp = Explanation.objects.get_or_create( type=ExplanationTypes.SHAP.value, split=split, predictive_model=predictive_model, job=job, results={} )[0] training_df_old, test_df_old = get_encoded_logs(job) explanation_target = '2_101' prefix_target = 'prefix_1' explanation = explain(exp, training_df_old, test_df_old, explanation_target, prefix_target) training_df_old, test_df_old = get_encoded_logs(job) explanation_temp = shap_temporal_stability(exp, training_df_old, test_df_old, explanation_target) self.assertTrue(type(explanation) is dict) self.assertTrue(type(explanation_temp) is dict)
def test_multiple_unique_events(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) training_log = get_log( create_test_log(log_path=general_example_train_filepath, log_name=general_example_train_filename)) events = unique_events2(training_log, test_log) self.assertEqual(8, len(events))
def setUp(self): self.train_log = get_log(create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.test_log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1)
def setUp(self): self.train_log = get_log(create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.train_event_names = unique_events(self.train_log) self.train_add_col = get_additional_columns(self.train_log) self.test_log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.test_event_names = unique_events(self.test_log) self.test_add_col = get_additional_columns(self.test_log)
def setUp(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) training_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.training_df, self.test_df = encode_label_logs( training_log, test_log, create_test_job( encoding=create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, add_elapsed_time=True), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)))
def test_global_event_attributes(self): log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) attributes = get_additional_columns(log) self.assertListEqual(attributes['event_attributes'], ['Activity', 'Costs', 'Resource', 'org:resource'])
def test_hyperopt(self): job = Job.objects.create( split=create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value, original_log=create_test_log( log_name=general_example_filename, log_path=general_example_filepath)), encoding=create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, prefix_length=3, padding=False), labelling=create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value), clustering=create_test_clustering( clustering_type=ClusteringMethods.KMEANS.value), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value, prediction_method=ClassificationMethods.RANDOM_FOREST.value), hyperparameter_optimizer=create_test_hyperparameter_optimizer( hyperoptim_type=HyperparameterOptimizationMethods.HYPEROPT. value, performance_metric=HyperOptLosses.ACC.value, max_evals=2)) prediction_task(job.pk) job = Job.objects.get(pk=1) self.assertFalse(classification_random_forest( ) == job.predictive_model.classification.__getattribute__( ClassificationMethods.RANDOM_FOREST.value.lower()).to_dict())
def test_event_attributes(self): log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) attributes = get_event_attributes(log) self.assertListEqual(attributes, ['Activity', 'Costs', 'Resource', 'org:resource'])
def test_trace_attributes(self): self.log = get_log(create_test_log(log_name=financial_log_filename, log_path=financial_log_filepath)) result = trace_attributes(self.log) self.assertEqual(2, len(result)) self.assertDictEqual({'name': 'AMOUNT_REQ', 'type': 'number', 'example': '20000'}, result[0]) self.assertDictEqual({'name': 'REG_DATE', 'type': 'string', 'example': '2011-10-01 00:38:44.546000+02:00'}, result[1])
def setUp(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) training_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) self.labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) self.training_df, self.test_df = encode_label_logs( training_log, test_log, create_test_job(encoding=self.encoding, labelling=self.labelling))
def split_single( split_ordering: str = SplitOrderingMethods.SPLIT_SEQUENTIAL.value, test_size: float = 0.2): return create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value, split_ordering_method=split_ordering, test_size=test_size, original_log=create_test_log( log_name=general_example_filename, log_path=general_example_filepath))
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) self.encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, add_elapsed_time=True, prefix_length=1)
def test_replay_prediction(self): job = create_test_job(create_models=True) runtime_log = create_test_log( log_name='runtime_example.xes', log_path='cache/log_cache/test_logs/runtime_test.xes') log = get_log(runtime_log) prediction_task(job.id) job.refresh_from_db() replay_prediction_task(job, job, log)
def setUp(self): self.train_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.add_col = get_additional_columns(self.train_log) self.encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) self.encodingPadding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True)
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.event_names = unique_events(self.log) self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) self.add_col = get_additional_columns(self.log) self.encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1)
def test_post_split_create_split(self): log = create_test_log() client = APIClient() response = client.post( '/splits/', { 'original_log': log.id, 'splitting_method': 'sequential', 'test_size': .2 }) self.assertEqual(log.id, response.data['original_log']) self.assertEqual('sequential', response.data['splitting_method']) self.assertEqual(.2, response.data['test_size'])
def test_runtime(self): job = create_test_job(create_models=True) runtime_log = create_test_log( log_name='runtime_example.xes', log_path='cache/log_cache/test_logs/runtime_test.xes') prediction_task(job.id) job.refresh_from_db() job.split = create_test_split( split_type=SplitTypes.SPLIT_DOUBLE.value, split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value, train_log=runtime_log, test_log=runtime_log) runtime_task(job)
def test_replay(self): job = create_test_job() runtime_job = duplicate_orm_row(job) runtime_log = create_test_log( log_name='runtime_example.xes', log_path='cache/log_cache/test_logs/runtime_test.xes') runtime_job.split = create_test_split( split_type=SplitTypes.SPLIT_DOUBLE.value, split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value, train_log=runtime_log, test_log=runtime_log) requests = replay_task(runtime_job, job) self.assertEqual(len(requests), 2)
def setUp(self) -> None: self.log = create_test_log( log_name='general_example.xes', log_path='cache/log_cache/test_logs/general_example.xes') self.encoding = create_test_encoding( prefix_length=4, padding=True, value_encoding=ValueEncodings.SIMPLE_INDEX.value) self.labelling = create_test_labelling( label_type=LabelTypes.NEXT_ACTIVITY.value, ) self.predictive_model = create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value, prediction_method=ClassificationMethods.DECISION_TREE.value)
def test_eval(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, add_elapsed_time=True, prefix_length=12, padding=True) df = simple_index( get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)), create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding) self.assertEqual(df.shape, (41, 15)) row1 = df[df.trace_id == '4'].iloc[4] self.assertListEqual( ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0, 0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist()) self.assertFalse(df.isnull().values.any())
def test_create_multiple(self): create_test_split(original_log=create_test_log()) client = APIClient() response = client.post('/jobs/multiple', { 'type': 'classification', 'split_id': 1, 'config': { 'clusterings': ['noCluster'], 'encodings': ['simpleIndex'], 'encoding': { 'padding': False, 'prefix_length': 1, 'generation_type': 'only', 'add_remaining_time': False, 'add_elapsed_time': False, 'add_executed_events': False, 'add_resources_used': False, 'add_new_traces': False, 'features': [], }, 'create_models': False, 'methods': ['randomForest', 'decisionTree', 'rnn'], 'kmeans': {}, 'incremental_train': { 'base_model': None, }, 'hyperparameter_optimizer': { 'algorithm_type': 'tpe', 'max_evaluations': 10, 'performance_metric': 'rmse', 'type': 'none', }, 'labelling': { 'type': 'next_activity', 'attribute_name': '', 'threshold_type': 'threshold_mean', 'threshold': 0, } }}, format='json') self.assertEqual(3, len(response.data))
def test_replay(self): job = create_test_job() # runtime_job = duplicate_orm_row(job) #todo: replace with simple CREATE runtime_job = Job.objects.create( created_date=job.created_date, modified_date=job.modified_date, error=job.error, status=job.status, type=job.type, create_models=job.create_models, case_id=job.case_id, event_number=job.event_number, gold_value=job.gold_value, results=job.results, parent_job=job.parent_job, split=job.split, encoding=job.encoding, labelling=job.labelling, clustering=job.clustering, predictive_model=job.predictive_model, evaluation=job.evaluation, hyperparameter_optimizer=job.hyperparameter_optimizer, incremental_train=job.incremental_train) runtime_log = create_test_log( log_name='runtime_example.xes', log_path='cache/log_cache/test_logs/runtime_test.xes') runtime_job.split = create_test_split( split_type=SplitTypes.SPLIT_DOUBLE.value, split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value, train_log=runtime_log, test_log=runtime_log) requests = replay_task(runtime_job, job) self.assertEqual(len(requests), 6)
def test_labelling_job_creation(self): client = APIClient() response = client.post('/jobs/multiple', self.job_label(create_test_split(original_log=create_test_log()).id), format='json') self.assertEqual(status.HTTP_201_CREATED, response.status_code) self.assertEqual(len(response.data), 1) self.assertEqual(response.data[0]['type'], 'labelling') self.assertEqual(response.data[0]['config']['encoding']['value_encoding'], 'simpleIndex') self.assertEqual(response.data[0]['config']['encoding']['prefix_length'], 3) self.assertEqual(response.data[0]['config']['labelling'], {'type': 'remaining_time', 'attribute_name': None, 'threshold_type': ThresholdTypes.THRESHOLD_MEAN.value, 'threshold': 0, 'results': {}}) self.assertEqual(response.data[0]['config']['encoding']['padding'], True) self.assertEqual(response.data[0]['status'], 'created')
def repair_example(): return create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value, original_log=create_test_log( log_name='repair_example.xes', log_path=repair_example_filepath))
def split_single(): return create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value, original_log=create_test_log( log_name=general_example_filename, log_path=general_example_filepath))
def setUp(self): log = create_test_log(log_name=general_example_filepath, log_path=general_example_filepath) create_test_split(split_type=SplitTypes.SPLIT_SINGLE.value, original_log=log)
def test_reg_job_creation(self): client = APIClient() response = client.post('/jobs/multiple', self.job_obj2(create_test_split(original_log=create_test_log()).id), format='json') self.assertEqual(status.HTTP_201_CREATED, response.status_code) self.assertEqual(1, len(response.data)) self.assertEqual(response.data[0]['type'], 'prediction') self.assertEqual(response.data[0]['config']['predictive_model']['predictive_model'], 'regression') self.assertEqual(ValueEncodings.SIMPLE_INDEX.value, response.data[0]['config']['encoding']['value_encoding']) self.assertEqual(ClusteringMethods.NO_CLUSTER.value, response.data[0]['config']['clustering']['clustering_method']) self.assertEqual(RegressionMethods.LINEAR.value, response.data[0]['config']['predictive_model']['prediction_method']) self.assertEqual(2, response.data[0]['config']['encoding']['prefix_length']) self.assertEqual(False, response.data[0]['config']['encoding']['padding']) self.assertEqual(JobStatuses.CREATED.value, response.data[0]['status'])
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_filename, log_path=general_example_filepath))
def test_explain(self): split = create_test_split( split_type=SplitTypes.SPLIT_DOUBLE.value, split_ordering_method=SplitOrderingMethods.SPLIT_SEQUENTIAL.value, test_size=0.2, original_log=None, train_log=create_test_log( log_name='train_explainability.xes', log_path='cache/log_cache/test_logs/train_explainability.xes'), test_log=create_test_log( log_name='test_explainability.xes', log_path='cache/log_cache/test_logs/test_explainability.xes')) predictive_model = create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value, prediction_method=ClassificationMethods.DECISION_TREE.value) job = create_test_job( split=split, encoding=create_test_encoding( prefix_length=4, padding=True, value_encoding=ValueEncodings.SIMPLE_INDEX.value), labelling=create_test_labelling( label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label'), clustering=None, create_models=True, predictive_model=predictive_model, job_type=JobTypes.PREDICTION.value, hyperparameter_optimizer=None, incremental_train=None) prediction_task(job.id, do_publish_result=False) job.refresh_from_db() exp = Explanation.objects.get_or_create( type=ExplanationTypes.ICE.value, split=split, predictive_model=predictive_model, job=job, results={})[0] training_df_old, test_df_old = get_encoded_logs(job) explanation_target = 'prefix_2' explanation = explain(exp, training_df_old, test_df_old, explanation_target, prefix_target=None) expected = [{ 'value': 'Contact Hospital', 'label': 1.2962962962962963, 'count': 351 }, { 'value': 'Create Questionnaire', 'label': 1.5526992287917738, 'count': 1167 }, { 'value': 'High Insurance Check', 'label': 1.2667660208643816, 'count': 671 }] self.assertEqual(expected, explanation)