def test_no_exceptions(self): filtered_labels = [enum.value for enum in LabelTypes] filtered_classification_methods = [ enum.value for enum in ClassificationMethods ] filtered_encoding_methods = [enum.value for enum in ValueEncodings] filtered_padding = [True, False] choices = [ filtered_encoding_methods, filtered_padding, filtered_classification_methods, filtered_labels ] job_combinations = list(itertools.product(*choices)) for (encoding, padding, method, label) in job_combinations: print(encoding, padding, method, label) if method == 'nn' and (padding == False or label == LabelTypes.ATTRIBUTE_STRING.value): pass job = create_test_job( predictive_model=create_test_predictive_model( prediction_method=method), encoding=create_test_encoding(value_encoding=encoding, padding=padding), labelling=create_test_labelling(label_type=label)) # with HidePrints(): calculate(job)
def test_duration(self): """Trace atr, zero padding means prefix length has no effect""" job = self.get_job() job.labelling = create_test_labelling(label_type=LabelTypes.DURATION.value) job.save() result1, _ = calculate(job) job.encoding = create_test_encoding(prefix_length=22, padding=True) job.save() result2, _ = calculate(job) self.assertEqual(result1, result2)
def prediction_task(job_id): logger.info("Start prediction task ID {}".format(job_id)) job = Job.objects.get(id=job_id) try: if (job.status == JobStatuses.CREATED.value and job.type != JobTypes.UPDATE.value) or \ (job.status == JobStatuses.CREATED.value and job.type == JobTypes.UPDATE.value and job.incremental_train.status == JobStatuses.COMPLETED.value): job.status = JobStatuses.RUNNING.value job.save() start_time = time.time() if job.hyperparameter_optimizer is not None and \ job.hyperparameter_optimizer.optimization_method != HyperparameterOptimizationMethods.NONE.value: result, model_split = hyperopt_task(job) else: result, model_split = calculate(job) elapsed_time = time.time() - start_time logger.info('\tJob took: {} in HH:MM:ss'.format( time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))) if job.create_models: save_models(model_split, job) job.result = result job.status = JobStatuses.COMPLETED.value except Exception as e: logger.error(e) job.status = JobStatuses.ERROR.value job.error = str(e.__repr__()) raise e finally: job.save() publish(job)
def prediction_task(job_id): print("Start prediction task ID {}".format(job_id)) job = Job.objects.get(id=job_id) try: if job.status == JobStatuses.CREATED.value: job.status = JobStatuses.RUNNING.value job.save() start_time = time.time() if job.hyperparameter_optimizer is not None: result, model_split = hyperopt_task(job) else: result, model_split = calculate(job) elapsed_time = time.time() - start_time print('\tJob took: {} in HH:MM:ss'.format( time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))) if job.create_models: save_models(model_split, job) job.result = result job.status = JobStatuses.COMPLETED.value except Exception as e: job.status = JobStatuses.ERROR.value job.error = str(e.__repr__()) raise e finally: job.save() publish(job)
def test_atr_string(self): job = self.get_job() job.labelling = create_test_labelling(label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='description') job.save() result, _ = calculate(job) self.assertEqual(result, {'Simulated process instance': 883})
def test_remaining_time(self): job = self.get_job() job.labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) job.save() result, _ = calculate(job) self.assertEqual(result, {'true': 529, 'false': 354})
def test_next_activity_kmeans(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering( clustering_type=ClusteringMethods.KMEANS.value), split=repair_example(), encoding=create_test_encoding(prefix_length=8, padding=True), labelling=create_test_labelling( label_type=LabelTypes.NEXT_ACTIVITY.value), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value, prediction_method=ClassificationMethods.RANDOM_FOREST.value)) result, _ = calculate(job) del result['elapsed_time'] self.assertDictEqual( result, { 'f1score': 0.54239884582595577, 'acc': 0.80995475113122173, 'true_positive': '--', 'true_negative': '--', 'false_negative': '--', 'false_positive': '--', 'precision': 0.62344720496894401, 'recall': 0.5224945442336747, 'auc': 0.4730604801339352 })
def test_class_no_cluster(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value), split=repair_example(), encoding=create_test_encoding(prefix_length=5, padding=True, add_elapsed_time=True), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value, prediction_method=ClassificationMethods.RANDOM_FOREST.value)) result, _ = calculate(job) del result['elapsed_time'] self.assertDictEqual( result, { 'f1score': 1.0, 'acc': 1.0, 'true_positive': '--', 'true_negative': '--', 'false_negative': '--', 'false_positive': '--', 'precision': 1.0, 'recall': 1.0, 'auc': 0.0 })
def test_next_activity(self): job = self.get_job() job.labelling = create_test_labelling(label_type=LabelTypes.NEXT_ACTIVITY.value) job.save() result, _ = calculate(job) self.assertEqual(result, {'0': 2, 'Repair (Complex)': 306, 'Test Repair': 432, 'Inform User': 5, 'Repair (Simple)': 138})
def test_update_nb(self): job = create_test_job( predictive_model=create_test_predictive_model( prediction_method=ClassificationMethods.HOEFFDING_TREE.value), labelling=create_test_labelling( label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='concept:name'), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value), create_models=True) result1, _ = calculate(job) job = create_test_job(predictive_model=create_test_predictive_model( prediction_method=ClassificationMethods.HOEFFDING_TREE.value), encoding=job.encoding, labelling=create_test_labelling( label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='concept:name'), clustering=job.clustering, incremental_train=job) result2, _ = calculate(job) del result1['elapsed_time'] del result2['elapsed_time'] self.assertDictEqual( result1, { 'f1score': 0.0, 'acc': 0.0, 'precision': 0.0, 'recall': 0.0, 'true_positive': 0, 'true_negative': 0, 'false_negative': 2, 'false_positive': 0, 'auc': 0.0 }) self.assertDictEqual( result2, { 'f1score': 0.0, 'acc': 0.0, 'precision': 0.0, 'recall': 0.0, 'true_positive': 0, 'true_negative': 0, 'false_negative': 2, 'false_positive': 0, 'auc': 0.0 })
def test_remaining_custom_threshold(self): job = self.get_job() job.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value, threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value, threshold=1600) job.save() result, _ = calculate(job) self.assertEqual(result, {'true': 444, 'false': 439})
def test_next_activity_DecisionTree(self): job = create_test_job( predictive_model=create_test_predictive_model( prediction_method=ClassificationMethods.DECISION_TREE.value), labelling=create_test_labelling( label_type=LabelTypes.NEXT_ACTIVITY.value), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) self.assertDictEqual(result, self.results3())
def test_class_randomForest(self): job = create_test_job( predictive_model=create_test_predictive_model( prediction_method=ClassificationMethods.RANDOM_FOREST.value), labelling=create_test_labelling( label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label'), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) self.assertDictEqual(result, self.results2())
def test_tsp_gru(self): job = create_test_job( predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.TIME_SERIES_PREDICTION.value, prediction_method=TimeSeriesPredictionMethods.RNN.value, configuration={'rnn_type': 'gru'}), labelling=create_test_labelling(), encoding=create_test_encoding(prefix_length=2, padding=True), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) del result['elapsed_time'] self.assertDictEqual(result, {'nlevenshtein': 0.6})
def test_regression_nn(self): job = create_test_job( predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.NN.value), labelling=create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) del result['elapsed_time'] print(result) self.assertAlmostEqual(result['mae'], 0.0001388888888888889) self.assertAlmostEqual(result['mape'], -1)
def test_regression_no_cluster(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering(clustering_type=ClusteringMethods.NO_CLUSTER.value), split=repair_example(), encoding=create_test_encoding(prefix_length=5, padding=True), labelling=create_test_labelling(label_type=LabelTypes.DURATION.value), predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.RANDOM_FOREST.value) ) result, _ = calculate(job) self.assertAlmostEqual(result['rmse'], 0.4868515876868242) self.assertAlmostEqual(result['mae'], 0.44340838774645464) self.assertAlmostEqual(result['rscore'], 0.02142755175443678) self.assertAlmostEqual(result['mape'], -1)
def test_regression_kmeans(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering(clustering_type=ClusteringMethods.KMEANS.value), split=repair_example(), encoding=create_test_encoding(prefix_length=5, padding=True), labelling=create_test_labelling(label_type=LabelTypes.DURATION.value), predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.RANDOM_FOREST.value) ) result, _ = calculate(job) self.assertAlmostEqual(result['rmse'], 0.48841552839653984) self.assertAlmostEqual(result['mae'], 0.44282462605873457) self.assertAlmostEqual(result['rscore'], 0.015130407121517586) self.assertAlmostEqual(result['mape'], -1)
def test_regression_kmeans(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering( clustering_type=ClusteringMethods.KMEANS.value), split=repair_example(), encoding=create_test_encoding(prefix_length=5, padding=True), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.RANDOM_FOREST.value)) result, _ = calculate(job) self.assertAlmostEqual(result['rmse'], 0.0325738) self.assertAlmostEqual(result['mae'], 0.00014269) self.assertAlmostEqual(result['rscore'], -0.11336870) self.assertAlmostEqual(result['mape'], float('inf'))
def test_regression_no_cluster(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value), split=repair_example(), encoding=create_test_encoding(prefix_length=5, padding=True), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.RANDOM_FOREST.value)) result, _ = calculate(job) self.assertAlmostEqual(result['rmse'], 0.03263757) self.assertAlmostEqual(result['mae'], 0.00011685) self.assertAlmostEqual(result['rscore'], 0.13776124) self.assertAlmostEqual(result['mape'], float('inf'))
def test_regression_kmeans(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering( clustering_type=ClusteringMethods.KMEANS.value), split=repair_example(), encoding=create_test_encoding(prefix_length=5, padding=True), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.RANDOM_FOREST.value)) result, _ = calculate(job) self.assertAlmostEqual(result['rmse'], 0.036930128) self.assertAlmostEqual(result['mae'], 0.023046561975) self.assertAlmostEqual(result['rscore'], 0.99830687) self.assertAlmostEqual(result['mape'], 0.5761640)
def test_next_activity_no_cluster(self): self.max_diff = None job = create_test_job( clustering=create_test_clustering(clustering_type=ClusteringMethods.NO_CLUSTER.value), split=repair_example(), encoding=create_test_encoding(prefix_length=8, padding=True), labelling=create_test_labelling(label_type=LabelTypes.NEXT_ACTIVITY.value), predictive_model=create_test_predictive_model(predictive_model=PredictiveModels.CLASSIFICATION.value, prediction_method=ClassificationMethods.RANDOM_FOREST.value) ) result, _ = calculate(job) self.assertAlmostEqual(result['f1score'], 0.542398845) self.assertAlmostEqual(result['acc'], 0.809954751) self.assertAlmostEqual(result['precision'], 0.623447204) self.assertAlmostEqual(result['recall'], 0.52249454423) self.assertAlmostEqual(result['auc'], 0)
def test_regression_xgboost(self): job = create_test_job( predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.XGBOOST.value), labelling=create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) del result['elapsed_time'] print(result) self.assertDictEqual( result, { 'mae': 0.00011968612670898438, 'mape': -1, 'rmse': 0.00011968612670898438, 'rscore': 0.0 })
def test_regression_lasso(self): job = create_test_job( predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.REGRESSION.value, prediction_method=RegressionMethods.LASSO.value), labelling=create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) del result['elapsed_time'] print(result) self.assertDictEqual( result, { 'mae': 0.0001388888888888889, 'mape': -1, 'rmse': 0.0001552824984374854, 'rscore': -0.25 })
def test_class_nn(self): job = create_test_job( predictive_model=create_test_predictive_model( prediction_method=ClassificationMethods.NN.value), labelling=create_test_labelling( label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='concept:name'), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) del result['elapsed_time'] self.assertIn('f1score', result) self.assertIn('acc', result) self.assertIn('precision', result) self.assertIn('recall', result) self.assertIn('true_positive', result) self.assertIn('true_negative', result) self.assertIn('false_negative', result) self.assertIn('false_positive', result) self.assertIn('auc', result)
def test_class_nn_binary(self): job = create_test_job( predictive_model=create_test_predictive_model( prediction_method=ClassificationMethods.NN.value), labelling=create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value, threshold_type=ThresholdTypes.THRESHOLD_MEAN.value), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) del result['elapsed_time'] self.assertDictEqual( result, { 'f1score': 0.0, 'acc': 0.0, 'precision': 0.0, 'recall': 0.0, 'true_positive': 0, 'true_negative': 0, 'false_negative': 2, 'false_positive': 0, 'auc': 0.0 })
def test_class_perc(self): job = create_test_job( predictive_model=create_test_predictive_model( prediction_method=ClassificationMethods.PERCEPTRON.value), labelling=create_test_labelling( label_type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='concept:name'), clustering=create_test_clustering( clustering_type=ClusteringMethods.NO_CLUSTER.value)) result, _ = calculate(job) del result['elapsed_time'] self.assertDictEqual( result, { 'f1score': 0.0, 'acc': 0.0, 'precision': 0.0, 'recall': 0.0, 'true_positive': 0, 'true_negative': 0, 'false_negative': 2, 'false_positive': 0, 'auc': 0.0 })
def calculate_helper(job): start_time = time.time() calculate(job) print('Total for %s %s seconds' % (job['method'], time.time() - start_time))