Ejemplo n.º 1
0
def create_test_clustering(
        clustering_type: str = ClusteringMethods.NO_CLUSTER.value,
        configuration: dict = {}) -> Clustering:
    clustering = Clustering.init(clustering_type, configuration)
    return clustering
Ejemplo n.º 2
0
def progetto_padova():
    JOB = Job.objects.get_or_create(
        status=JobStatuses.CREATED.value,
        type=JobTypes.PREDICTION.value,
        split=Split.objects.get_or_create(  # this creates the split of the log
            type=SplitTypes.SPLIT_DOUBLE.value,
            train_log=create_log(  # this imports the log
                import_log(BASE_DIR + RELATIVE_TRAIN_PATH),
                RELATIVE_TRAIN_PATH,
                BASE_DIR,
                import_in_cache=False),
            test_log=create_log(  # this imports the log
                import_log(BASE_DIR + RELATIVE_VALIDATION_PATH),
                RELATIVE_VALIDATION_PATH,
                BASE_DIR,
                import_in_cache=False))[0],
        encoding=Encoding.objects.
        get_or_create(  # this defines the encoding method
            data_encoding=DataEncodings.LABEL_ENCODER.value,
            value_encoding=ValueEncodings.SIMPLE_INDEX.value,
            add_elapsed_time=False,
            add_remaining_time=False,
            add_executed_events=False,
            add_resources_used=False,
            add_new_traces=False,
            prefix_length=5,
            padding=True,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            features=[])[0],
        labelling=Labelling.objects.get_or_create(  # this defines the label
            type=LabelTypes.ATTRIBUTE_STRING.value,
            attribute_name='label',
            threshold_type=None,
            threshold=None)[0],
        clustering=Clustering.init(ClusteringMethods.NO_CLUSTER.value,
                                   configuration={}),
        predictive_model=PredictiveModel.
        init(  # this defines the predictive model
            get_prediction_method_config(
                PredictiveModels.CLASSIFICATION.value,
                ClassificationMethods.DECISION_TREE.value,
                payload={
                    'max_depth': 2,
                    'min_samples_split': 2,
                    'min_samples_leaf': 2
                })),
        hyperparameter_optimizer=HyperparameterOptimization.init(
            {  # this defines the hyperparameter optimisation procedure
                'type': HyperparameterOptimizationMethods.HYPEROPT.value,
                'max_evaluations': 10,
                'performance_metric': HyperOptAlgorithms.TPE.value,
                'algorithm_type': HyperOptLosses.AUC.value
            }),
        create_models=True)[0]

    # load log
    train_log, test_log, additional_columns = get_train_test_log(JOB.split)

    # encode
    train_df, test_df = encode_label_logs(train_log, test_log, JOB)

    # train + evaluate
    results, model_split = MODEL[JOB.predictive_model.predictive_model][
        ModelActions.BUILD_MODEL_AND_TEST.value](train_df, test_df,
                                                 _init_clusterer(
                                                     JOB.clustering, train_df),
                                                 JOB)

    if JOB.create_models:
        save_models(model_split, JOB)

    # predict
    data_df = pd.concat([train_df, test_df])
    results = MODEL[JOB.predictive_model.predictive_model][
        ModelActions.PREDICT.value](JOB, data_df)
    results = MODEL[JOB.predictive_model.predictive_model][
        ModelActions.PREDICT_PROBA.value](JOB, data_df)

    # lime
    exp = Explanation.objects.get_or_create(
        type=ExplanationTypes.LIME.value,
        split=JOB.
        split,  # this defines the analysed log, you can use a different one from the training one
        predictive_model=JOB.predictive_model,
        job=JOB)[0]
    error, result = explanation(exp.id, int(EXPLANATION_TARGET))
Ejemplo n.º 3
0
def generate(split, payload):
    jobs = []

    config = payload['config']
    labelling_config = config['labelling'] if 'labelling' in config else {}
    job_type = JobTypes.PREDICTION.value
    prediction_type = payload['type']

    for method in config['methods']:
        for clustering in config['clusterings']:
            for encMethod in config['encodings']:
                encoding = config['encoding']
                if encoding['generation_type'] == UP_TO:
                    for i in range(1, encoding['prefix_length'] + 1):
                        encoding = Encoding.objects.get_or_create(
                            data_encoding=DataEncodings.LABEL_ENCODER.value,
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                            add_remaining_time=labelling_config.get('add_remaining_time', False),
                            add_executed_events=labelling_config.get('add_executed_events', False),
                            add_resources_used=labelling_config.get('add_resources_used', False),
                            add_new_traces=labelling_config.get('add_new_traces', False),
                            prefix_length=i,
                            # TODO static check?
                            padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                            features=config['encoding'].get('features', [])
                        )[0]

                        predictive_model = PredictiveModel.init(
                            get_prediction_method_config(prediction_type, method, config))

                        job = Job.objects.get_or_create(
                            status=JobStatuses.CREATED.value,
                            type=job_type,
                            split=split,
                            encoding=encoding,
                            labelling=Labelling.objects.get_or_create(
                                type=labelling_config.get('type', None),
                                # TODO static check?
                                attribute_name=labelling_config.get('attribute_name', None),
                                threshold_type=labelling_config.get('threshold_type', None),
                                threshold=labelling_config.get('threshold', None)
                            )[0] if labelling_config != {} else None,
                            clustering=Clustering.init(clustering, configuration=config.get(clustering, {}))
                            if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value
                            else Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}),
                            # TODO TEMPORARY workaround,
                            hyperparameter_optimizer=HyperparameterOptimization.init(
                                config.get('hyperparameter_optimizer', {
                                    'type': None}) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else {
                                    'type': None}),
                            # TODO TEMPORARY workaround
                            predictive_model=predictive_model,
                            create_models=config.get('create_models', False)
                        )[0]

                        check_predictive_model_not_overwrite(job)
                        set_model_name(job)

                        jobs.append(job)
                else:
                    predictive_model = PredictiveModel.init(
                        get_prediction_method_config(prediction_type, method, config))

                    job = Job.objects.get_or_create(
                        status=JobStatuses.CREATED.value,
                        type=job_type,
                        split=split,
                        encoding=Encoding.objects.get_or_create(
                            data_encoding=DataEncodings.LABEL_ENCODER.value,
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                            add_remaining_time=labelling_config.get('add_remaining_time', False),
                            add_executed_events=labelling_config.get('add_executed_events', False),
                            add_resources_used=labelling_config.get('add_resources_used', False),
                            add_new_traces=labelling_config.get('add_new_traces', False),
                            prefix_length=config['encoding']['prefix_length'],
                            # TODO static check?
                            padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                            features=config['encoding'].get('features', [])
                        )[0],
                        labelling=Labelling.objects.get_or_create(
                            type=labelling_config.get('type', None),
                            # TODO static check?
                            attribute_name=labelling_config.get('attribute_name', None),
                            threshold_type=labelling_config.get('threshold_type', None),
                            threshold=labelling_config.get('threshold', None)
                        )[0] if labelling_config != {} else None,
                        clustering=Clustering.init(clustering, configuration=config.get(clustering, {}))
                        if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value
                        else Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}),
                        hyperparameter_optimizer=HyperparameterOptimization.init(
                            config.get('hyperparameter_optimizer', {
                                'type': 'none'}) if predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value else {
                                'type': 'none'}),
                        # TODO TEMPORARY workaround
                        predictive_model=predictive_model,
                        create_models=config.get('create_models', False)
                    )[0]

                    check_predictive_model_not_overwrite(job)
                    set_model_name(job)

                    jobs.append(job)

    return jobs
Ejemplo n.º 4
0
def update(split, payload, generation_type=PredictiveModels.CLASSIFICATION.value):  # TODO adapt to allow selecting the predictive_model to update
    jobs = []

    config = payload['config']
    labelling_config = config['labelling'] if 'labelling' in config else {}

    for method in payload['config']['methods']:
        for clustering in payload['config']['clusterings']:
            for incremental_base_model in payload['config']['incremental_train']:
                for encMethod in payload['config']['encodings']:
                    encoding = payload['config']['encoding']
                    if encoding['generation_type'] == UP_TO:
                        for i in range(1, encoding['prefix_length'] + 1):
                            job, _ = Job.objects.get_or_create(
                                status=JobStatuses.CREATED.value,
                                type=JobTypes.UPDATE.value,
                                split=split,
                                encoding=Encoding.objects.get_or_create(  # TODO fixme
                                    data_encoding=DataEncodings.LABEL_ENCODER.value,
                                    value_encoding=encMethod,
                                    add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                                    add_remaining_time=labelling_config.get('add_remaining_time', False),
                                    add_executed_events=labelling_config.get('add_executed_events', False),
                                    add_resources_used=labelling_config.get('add_resources_used', False),
                                    add_new_traces=labelling_config.get('add_new_traces', False),
                                    prefix_length=i,
                                    # TODO static check?
                                    padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                                    task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                                    features=config['encoding'].get('features', [])
                                )[0],
                                labelling=Labelling.objects.get_or_create(
                                    type=labelling_config.get('type', None),
                                    # TODO static check?
                                    attribute_name=labelling_config.get('attribute_name', None),
                                    threshold_type=labelling_config.get('threshold_type', None),
                                    threshold=labelling_config.get('threshold', None)
                                )[0] if labelling_config != {} else None,
                                clustering=Clustering.init(clustering, configuration=config.get(clustering, {})),
                                predictive_model=PredictiveModel.init(
                                    get_prediction_method_config(generation_type, method, payload)
                                ),
                                hyperparameter_optimizer=HyperparameterOptimization.init(
                                    config.get('hyperparameter_optimizer', None)),
                                create_models=config.get('create_models', False),
                                incremental_train=Job.objects.filter(
                                    pk=incremental_base_model
                                )[0]
                            )

                            check_predictive_model_not_overwrite(job)
                            set_model_name(job)

                            jobs.append(job)
                    else:
                        job, _ = Job.objects.get_or_create(
                            status=JobStatuses.CREATED.value,
                            type=JobTypes.UPDATE.value,

                            split=split,
                            encoding=Encoding.objects.get_or_create(  # TODO fixme
                                data_encoding=DataEncodings.LABEL_ENCODER.value,
                                value_encoding=encMethod,
                                add_elapsed_time=labelling_config.get('add_elapsed_time', False),
                                add_remaining_time=labelling_config.get('add_remaining_time', False),
                                add_executed_events=labelling_config.get('add_executed_events', False),
                                add_resources_used=labelling_config.get('add_resources_used', False),
                                add_new_traces=labelling_config.get('add_new_traces', False),
                                prefix_length=config['encoding']['prefix_length'],
                                # TODO static check?
                                padding=True if config['encoding']['padding'] == 'zero_padding' else False,
                                task_generation_type=config['encoding'].get('generation_type', 'only_this'),
                                features=config['encoding'].get('features', [])
                            )[0],
                            labelling=Labelling.objects.get_or_create(
                                type=labelling_config.get('type', None),
                                # TODO static check?
                                attribute_name=labelling_config.get('attribute_name', None),
                                threshold_type=labelling_config.get('threshold_type', None),
                                threshold=labelling_config.get('threshold', None)
                            )[0] if labelling_config != {} else None,
                            clustering=Clustering.init(clustering, configuration=config.get(clustering, {})),
                            predictive_model=PredictiveModel.init(
                                get_prediction_method_config(generation_type, method, payload)
                            ),
                            hyperparameter_optimizer=HyperparameterOptimization.init(
                                config.get('hyperparameter_optimizer', None)),
                            create_models=config.get('create_models', False),
                            incremental_train=Job.objects.filter(
                                pk=incremental_base_model
                            )[0]
                        )

                        check_predictive_model_not_overwrite(job)
                        set_model_name(job)

                        jobs.append(job)
    return jobs
Ejemplo n.º 5
0
def generate(split, payload):
    jobs = []

    config = payload['config']
    labelling_config = config['labelling'] if 'labelling' in config else {}
    job_type = JobTypes.PREDICTION.value
    prediction_type = payload['type']

    for method in config['methods']:
        for clustering in config['clusterings']:
            for encMethod in config['encodings']:
                encoding = config['encoding']
                if encoding['generation_type'] == UP_TO:
                    for i in range(1, encoding['prefix_length'] + 1):
                        encoding = Encoding.objects.get_or_create(
                            data_encoding='label_encoder',
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get(
                                'add_elapsed_time', False),
                            add_remaining_time=labelling_config.get(
                                'add_remaining_time', False),
                            add_executed_events=labelling_config.get(
                                'add_executed_events', False),
                            add_resources_used=labelling_config.get(
                                'add_resources_used', False),
                            add_new_traces=labelling_config.get(
                                'add_new_traces', False),
                            prefix_length=i,
                            # TODO static check?
                            padding=True if config['encoding']['padding']
                            == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get(
                                'generation_type', 'only_this'))[0]

                        predictive_model = PredictiveModel.init(
                            get_prediction_method_config(
                                prediction_type, method, config))

                        job = Job.objects.get_or_create(
                            status=JobStatuses.CREATED.value,
                            type=job_type,
                            split=split,
                            encoding=encoding,
                            labelling=Labelling.objects.get_or_create(
                                type=labelling_config.get('type', None),
                                # TODO static check?
                                attribute_name=labelling_config.get(
                                    'attribute_name', None),
                                threshold_type=labelling_config.get(
                                    'threshold_type', None),
                                threshold=labelling_config.get(
                                    'threshold', None))[0]
                            if labelling_config != {} else None,
                            clustering=Clustering.init(
                                clustering,
                                configuration=config.get(clustering, {})),
                            predictive_model=predictive_model)[0]

                        jobs.append(job)
                else:
                    predictive_model = PredictiveModel.init(
                        get_prediction_method_config(prediction_type, method,
                                                     config))

                    job = Job.objects.get_or_create(
                        status=JobStatuses.CREATED.value,
                        type=job_type,
                        split=split,
                        encoding=Encoding.objects.get_or_create(
                            data_encoding='label_encoder',
                            value_encoding=encMethod,
                            add_elapsed_time=labelling_config.get(
                                'add_elapsed_time', False),
                            add_remaining_time=labelling_config.get(
                                'add_remaining_time', False),
                            add_executed_events=labelling_config.get(
                                'add_executed_events', False),
                            add_resources_used=labelling_config.get(
                                'add_resources_used', False),
                            add_new_traces=labelling_config.get(
                                'add_new_traces', False),
                            prefix_length=config['encoding']['prefix_length'],
                            # TODO static check?
                            padding=True if config['encoding']['padding']
                            == 'zero_padding' else False,
                            task_generation_type=config['encoding'].get(
                                'generation_type', 'only_this'))[0],
                        labelling=Labelling.objects.get_or_create(
                            type=labelling_config.get('type', None),
                            # TODO static check?
                            attribute_name=labelling_config.get(
                                'attribute_name', None),
                            threshold_type=labelling_config.get(
                                'threshold_type', None),
                            threshold=labelling_config.get('threshold',
                                                           None))[0]
                        if labelling_config != {} else None,
                        clustering=Clustering.init(clustering,
                                                   configuration=config.get(
                                                       clustering, {})),
                        predictive_model=predictive_model)[0]
                    jobs.append(job)

    return jobs