def create_test_predictive_model( predictive_model: str = PredictiveModels.CLASSIFICATION.value, prediction_method: str = ClassificationMethods.RANDOM_FOREST.value, configuration: dict = {}) -> PredictiveModel: pred_model = PredictiveModel.init( get_prediction_method_config(predictive_model, prediction_method, configuration)) return pred_model
def progetto_padova(): JOB = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=JobTypes.PREDICTION.value, split=Split.objects.get_or_create( # this creates the split of the log type=SplitTypes.SPLIT_DOUBLE.value, train_log=create_log( # this imports the log import_log(BASE_DIR + RELATIVE_TRAIN_PATH), RELATIVE_TRAIN_PATH, BASE_DIR, import_in_cache=False), test_log=create_log( # this imports the log import_log(BASE_DIR + RELATIVE_VALIDATION_PATH), RELATIVE_VALIDATION_PATH, BASE_DIR, import_in_cache=False))[0], encoding=Encoding.objects. get_or_create( # this defines the encoding method data_encoding=DataEncodings.LABEL_ENCODER.value, value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=False, add_remaining_time=False, add_executed_events=False, add_resources_used=False, add_new_traces=False, prefix_length=5, padding=True, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, features=[])[0], labelling=Labelling.objects.get_or_create( # this defines the label type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label', threshold_type=None, threshold=None)[0], clustering=Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}), predictive_model=PredictiveModel. init( # this defines the predictive model get_prediction_method_config( PredictiveModels.CLASSIFICATION.value, ClassificationMethods.DECISION_TREE.value, payload={ 'max_depth': 2, 'min_samples_split': 2, 'min_samples_leaf': 2 })), hyperparameter_optimizer=HyperparameterOptimization.init( { # this defines the hyperparameter optimisation procedure 'type': HyperparameterOptimizationMethods.HYPEROPT.value, 'max_evaluations': 10, 'performance_metric': HyperOptAlgorithms.TPE.value, 'algorithm_type': HyperOptLosses.AUC.value }), create_models=True)[0] # load log train_log, test_log, additional_columns = get_train_test_log(JOB.split) # encode train_df, test_df = encode_label_logs(train_log, test_log, JOB) # train + evaluate results, model_split = MODEL[JOB.predictive_model.predictive_model][ ModelActions.BUILD_MODEL_AND_TEST.value](train_df, test_df, _init_clusterer( JOB.clustering, train_df), JOB) if JOB.create_models: save_models(model_split, JOB) # predict data_df = pd.concat([train_df, test_df]) results = MODEL[JOB.predictive_model.predictive_model][ ModelActions.PREDICT.value](JOB, data_df) results = MODEL[JOB.predictive_model.predictive_model][ ModelActions.PREDICT_PROBA.value](JOB, data_df) # lime exp = Explanation.objects.get_or_create( type=ExplanationTypes.LIME.value, split=JOB. split, # this defines the analysed log, you can use a different one from the training one predictive_model=JOB.predictive_model, job=JOB)[0] error, result = explanation(exp.id, int(EXPLANATION_TARGET))