def create_test_clustering( clustering_type: str = ClusteringMethods.NO_CLUSTER.value, configuration: dict = {}) -> Clustering: clustering = Clustering.init(clustering_type, configuration) return clustering
def progetto_padova(): JOB = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=JobTypes.PREDICTION.value, split=Split.objects.get_or_create( # this creates the split of the log type=SplitTypes.SPLIT_DOUBLE.value, train_log=create_log( # this imports the log import_log(BASE_DIR + RELATIVE_TRAIN_PATH), RELATIVE_TRAIN_PATH, BASE_DIR, import_in_cache=False), test_log=create_log( # this imports the log import_log(BASE_DIR + RELATIVE_VALIDATION_PATH), RELATIVE_VALIDATION_PATH, BASE_DIR, import_in_cache=False))[0], encoding=Encoding.objects. get_or_create( # this defines the encoding method data_encoding=DataEncodings.LABEL_ENCODER.value, value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=False, add_remaining_time=False, add_executed_events=False, add_resources_used=False, add_new_traces=False, prefix_length=5, padding=True, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, features=[])[0], labelling=Labelling.objects.get_or_create( # this defines the label type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label', threshold_type=None, threshold=None)[0], clustering=Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}), predictive_model=PredictiveModel. init( # this defines the predictive model get_prediction_method_config( PredictiveModels.CLASSIFICATION.value, ClassificationMethods.DECISION_TREE.value, payload={ 'max_depth': 2, 'min_samples_split': 2, 'min_samples_leaf': 2 })), hyperparameter_optimizer=HyperparameterOptimization.init( { # this defines the hyperparameter optimisation procedure 'type': HyperparameterOptimizationMethods.HYPEROPT.value, 'max_evaluations': 10, 'performance_metric': HyperOptAlgorithms.TPE.value, 'algorithm_type': HyperOptLosses.AUC.value }), create_models=True)[0] # load log train_log, test_log, additional_columns = get_train_test_log(JOB.split) # encode train_df, test_df = encode_label_logs(train_log, test_log, JOB) # train + evaluate results, model_split = MODEL[JOB.predictive_model.predictive_model][ ModelActions.BUILD_MODEL_AND_TEST.value](train_df, test_df, _init_clusterer( JOB.clustering, train_df), JOB) if JOB.create_models: save_models(model_split, JOB) # predict data_df = pd.concat([train_df, test_df]) results = MODEL[JOB.predictive_model.predictive_model][ ModelActions.PREDICT.value](JOB, data_df) results = MODEL[JOB.predictive_model.predictive_model][ ModelActions.PREDICT_PROBA.value](JOB, data_df) # lime exp = Explanation.objects.get_or_create( type=ExplanationTypes.LIME.value, split=JOB. split, # this defines the analysed log, you can use a different one from the training one predictive_model=JOB.predictive_model, job=JOB)[0] error, result = explanation(exp.id, int(EXPLANATION_TARGET))
def generate(split, payload): jobs = [] config = payload['config'] labelling_config = config['labelling'] if 'labelling' in config else {} job_type = JobTypes.PREDICTION.value prediction_type = payload['type'] for method in config['methods']: for clustering in config['clusterings']: for encMethod in config['encodings']: encoding = config['encoding'] if encoding['generation_type'] == UP_TO: for i in range(1, encoding['prefix_length'] + 1): encoding = Encoding.objects.get_or_create( data_encoding=DataEncodings.LABEL_ENCODER.value, value_encoding=encMethod, add_elapsed_time=labelling_config.get('add_elapsed_time', False), add_remaining_time=labelling_config.get('add_remaining_time', False), add_executed_events=labelling_config.get('add_executed_events', False), add_resources_used=labelling_config.get('add_resources_used', False), add_new_traces=labelling_config.get('add_new_traces', False), prefix_length=i, # TODO static check? padding=True if config['encoding']['padding'] == 'zero_padding' else False, task_generation_type=config['encoding'].get('generation_type', 'only_this'), features=config['encoding'].get('features', []) )[0] predictive_model = PredictiveModel.init( get_prediction_method_config(prediction_type, method, config)) job = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=job_type, split=split, encoding=encoding, labelling=Labelling.objects.get_or_create( type=labelling_config.get('type', None), # TODO static check? attribute_name=labelling_config.get('attribute_name', None), threshold_type=labelling_config.get('threshold_type', None), threshold=labelling_config.get('threshold', None) )[0] if labelling_config != {} else None, clustering=Clustering.init(clustering, configuration=config.get(clustering, {})) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}), # TODO TEMPORARY workaround, hyperparameter_optimizer=HyperparameterOptimization.init( config.get('hyperparameter_optimizer', { 'type': None}) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else { 'type': None}), # TODO TEMPORARY workaround predictive_model=predictive_model, create_models=config.get('create_models', False) )[0] check_predictive_model_not_overwrite(job) set_model_name(job) jobs.append(job) else: predictive_model = PredictiveModel.init( get_prediction_method_config(prediction_type, method, config)) job = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=job_type, split=split, encoding=Encoding.objects.get_or_create( data_encoding=DataEncodings.LABEL_ENCODER.value, value_encoding=encMethod, add_elapsed_time=labelling_config.get('add_elapsed_time', False), add_remaining_time=labelling_config.get('add_remaining_time', False), add_executed_events=labelling_config.get('add_executed_events', False), add_resources_used=labelling_config.get('add_resources_used', False), add_new_traces=labelling_config.get('add_new_traces', False), prefix_length=config['encoding']['prefix_length'], # TODO static check? padding=True if config['encoding']['padding'] == 'zero_padding' else False, task_generation_type=config['encoding'].get('generation_type', 'only_this'), features=config['encoding'].get('features', []) )[0], labelling=Labelling.objects.get_or_create( type=labelling_config.get('type', None), # TODO static check? attribute_name=labelling_config.get('attribute_name', None), threshold_type=labelling_config.get('threshold_type', None), threshold=labelling_config.get('threshold', None) )[0] if labelling_config != {} else None, clustering=Clustering.init(clustering, configuration=config.get(clustering, {})) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}), hyperparameter_optimizer=HyperparameterOptimization.init( config.get('hyperparameter_optimizer', { 'type': 'none'}) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else { 'type': 'none'}), # TODO TEMPORARY workaround predictive_model=predictive_model, create_models=config.get('create_models', False) )[0] check_predictive_model_not_overwrite(job) set_model_name(job) jobs.append(job) return jobs
def update(split, payload, generation_type=PredictiveModels.CLASSIFICATION.value): # TODO adapt to allow selecting the predictive_model to update jobs = [] config = payload['config'] labelling_config = config['labelling'] if 'labelling' in config else {} for method in payload['config']['methods']: for clustering in payload['config']['clusterings']: for incremental_base_model in payload['config']['incremental_train']: for encMethod in payload['config']['encodings']: encoding = payload['config']['encoding'] if encoding['generation_type'] == UP_TO: for i in range(1, encoding['prefix_length'] + 1): job, _ = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=JobTypes.UPDATE.value, split=split, encoding=Encoding.objects.get_or_create( # TODO fixme data_encoding=DataEncodings.LABEL_ENCODER.value, value_encoding=encMethod, add_elapsed_time=labelling_config.get('add_elapsed_time', False), add_remaining_time=labelling_config.get('add_remaining_time', False), add_executed_events=labelling_config.get('add_executed_events', False), add_resources_used=labelling_config.get('add_resources_used', False), add_new_traces=labelling_config.get('add_new_traces', False), prefix_length=i, # TODO static check? padding=True if config['encoding']['padding'] == 'zero_padding' else False, task_generation_type=config['encoding'].get('generation_type', 'only_this'), features=config['encoding'].get('features', []) )[0], labelling=Labelling.objects.get_or_create( type=labelling_config.get('type', None), # TODO static check? attribute_name=labelling_config.get('attribute_name', None), threshold_type=labelling_config.get('threshold_type', None), threshold=labelling_config.get('threshold', None) )[0] if labelling_config != {} else None, clustering=Clustering.init(clustering, configuration=config.get(clustering, {})), predictive_model=PredictiveModel.init( get_prediction_method_config(generation_type, method, payload) ), hyperparameter_optimizer=HyperparameterOptimization.init( config.get('hyperparameter_optimizer', None)), create_models=config.get('create_models', False), incremental_train=Job.objects.filter( pk=incremental_base_model )[0] ) check_predictive_model_not_overwrite(job) set_model_name(job) jobs.append(job) else: job, _ = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=JobTypes.UPDATE.value, split=split, encoding=Encoding.objects.get_or_create( # TODO fixme data_encoding=DataEncodings.LABEL_ENCODER.value, value_encoding=encMethod, add_elapsed_time=labelling_config.get('add_elapsed_time', False), add_remaining_time=labelling_config.get('add_remaining_time', False), add_executed_events=labelling_config.get('add_executed_events', False), add_resources_used=labelling_config.get('add_resources_used', False), add_new_traces=labelling_config.get('add_new_traces', False), prefix_length=config['encoding']['prefix_length'], # TODO static check? padding=True if config['encoding']['padding'] == 'zero_padding' else False, task_generation_type=config['encoding'].get('generation_type', 'only_this'), features=config['encoding'].get('features', []) )[0], labelling=Labelling.objects.get_or_create( type=labelling_config.get('type', None), # TODO static check? attribute_name=labelling_config.get('attribute_name', None), threshold_type=labelling_config.get('threshold_type', None), threshold=labelling_config.get('threshold', None) )[0] if labelling_config != {} else None, clustering=Clustering.init(clustering, configuration=config.get(clustering, {})), predictive_model=PredictiveModel.init( get_prediction_method_config(generation_type, method, payload) ), hyperparameter_optimizer=HyperparameterOptimization.init( config.get('hyperparameter_optimizer', None)), create_models=config.get('create_models', False), incremental_train=Job.objects.filter( pk=incremental_base_model )[0] ) check_predictive_model_not_overwrite(job) set_model_name(job) jobs.append(job) return jobs
def generate(split, payload): jobs = [] config = payload['config'] labelling_config = config['labelling'] if 'labelling' in config else {} job_type = JobTypes.PREDICTION.value prediction_type = payload['type'] for method in config['methods']: for clustering in config['clusterings']: for encMethod in config['encodings']: encoding = config['encoding'] if encoding['generation_type'] == UP_TO: for i in range(1, encoding['prefix_length'] + 1): encoding = Encoding.objects.get_or_create( data_encoding='label_encoder', value_encoding=encMethod, add_elapsed_time=labelling_config.get( 'add_elapsed_time', False), add_remaining_time=labelling_config.get( 'add_remaining_time', False), add_executed_events=labelling_config.get( 'add_executed_events', False), add_resources_used=labelling_config.get( 'add_resources_used', False), add_new_traces=labelling_config.get( 'add_new_traces', False), prefix_length=i, # TODO static check? padding=True if config['encoding']['padding'] == 'zero_padding' else False, task_generation_type=config['encoding'].get( 'generation_type', 'only_this'))[0] predictive_model = PredictiveModel.init( get_prediction_method_config( prediction_type, method, config)) job = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=job_type, split=split, encoding=encoding, labelling=Labelling.objects.get_or_create( type=labelling_config.get('type', None), # TODO static check? attribute_name=labelling_config.get( 'attribute_name', None), threshold_type=labelling_config.get( 'threshold_type', None), threshold=labelling_config.get( 'threshold', None))[0] if labelling_config != {} else None, clustering=Clustering.init( clustering, configuration=config.get(clustering, {})), predictive_model=predictive_model)[0] jobs.append(job) else: predictive_model = PredictiveModel.init( get_prediction_method_config(prediction_type, method, config)) job = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=job_type, split=split, encoding=Encoding.objects.get_or_create( data_encoding='label_encoder', value_encoding=encMethod, add_elapsed_time=labelling_config.get( 'add_elapsed_time', False), add_remaining_time=labelling_config.get( 'add_remaining_time', False), add_executed_events=labelling_config.get( 'add_executed_events', False), add_resources_used=labelling_config.get( 'add_resources_used', False), add_new_traces=labelling_config.get( 'add_new_traces', False), prefix_length=config['encoding']['prefix_length'], # TODO static check? padding=True if config['encoding']['padding'] == 'zero_padding' else False, task_generation_type=config['encoding'].get( 'generation_type', 'only_this'))[0], labelling=Labelling.objects.get_or_create( type=labelling_config.get('type', None), # TODO static check? attribute_name=labelling_config.get( 'attribute_name', None), threshold_type=labelling_config.get( 'threshold_type', None), threshold=labelling_config.get('threshold', None))[0] if labelling_config != {} else None, clustering=Clustering.init(clustering, configuration=config.get( clustering, {})), predictive_model=predictive_model)[0] jobs.append(job) return jobs