コード例 #1
0
    def setUp(self):
        self.train_log = get_log(
            create_test_log(log_name=general_example_train_filename,
                            log_path=general_example_train_filepath))
        self.train_event_names = unique_events(self.train_log)
        self.train_add_col = get_additional_columns(self.train_log)

        self.test_log = get_log(
            create_test_log(log_name=general_example_test_filename,
                            log_path=general_example_test_filepath_xes))
        self.test_event_names = unique_events(self.test_log)
        self.test_add_col = get_additional_columns(self.test_log)
コード例 #2
0
 def setUp(self):
     self.train_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath_xes))
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.BOOLEAN.value,
         prefix_length=2,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value)
コード例 #3
0
def get_train_test_log(split: Split):
    """Returns training_log and test_log"""
    if split.type == SplitTypes.SPLIT_SINGLE.value and Split.objects.filter(
        type=SplitTypes.SPLIT_DOUBLE.value,
        original_log=split.original_log,
        test_size=split.test_size,
        splitting_method=split.splitting_method
    ).exists() and split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
        return get_train_test_log(Split.objects.filter(
            type=SplitTypes.SPLIT_DOUBLE.value,
            original_log=split.original_log,
            test_size=split.test_size,
            splitting_method=split.splitting_method
        )[0])
    elif split.original_log is not None and (not Split.objects.filter(
        type=SplitTypes.SPLIT_DOUBLE.value,
        original_log=split.original_log,
        test_size=split.test_size,
        splitting_method=split.splitting_method
    ).exists() or split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value):
        training_log, test_log = _split_single_log(split)
        additional_columns = get_additional_columns(get_log(split.original_log))

        if split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
            _ = Split.objects.get_or_create(
                type=SplitTypes.SPLIT_DOUBLE.value,
                original_log=split.original_log,
                test_size=split.test_size,
                splitting_method=split.splitting_method,
                train_log=create_log(EventLog(training_log), '0-' + str(100 - int(split.test_size * 100)) + '.xes'),
                test_log=create_log(EventLog(test_log), str(100 - int(split.test_size * 100)) + '-100.xes'),
                additional_columns=split.additional_columns
            )[0]

        logger.info("\t\tLoaded single log from {}".format(split.original_log.path))
    else:
        # Have to use sklearn to convert some internal data types
        training_log = get_log(split.train_log)
        additional_columns = get_additional_columns(training_log)
        if split.additional_columns is None:
            split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes'
            split.save()
        training_log, train_log_to_append = train_test_split(training_log, test_size=0, shuffle=False)
        test_log, test_log_to_append = train_test_split(get_log(split.test_log), test_size=0, shuffle=False)
        logger.info("\t\tLoaded double logs from {} and {}.".format(split.train_log.path, split.test_log.path))
    if len(training_log) == 0:
        raise TypeError("Training log is empty. Create a new Split with better parameters")
    return training_log, test_log, additional_columns
コード例 #4
0
    def test_can_find_log_file(self):
        log = Log.objects.get(name="general_example.xes",
                              path=general_example_filepath)

        log_file = get_log(log)

        self.assertEqual(6, len(log_file))
コード例 #5
0
 def test_global_event_attributes(self):
     log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath_xes))
     attributes = get_additional_columns(log)
     self.assertListEqual(attributes['event_attributes'],
                          ['Activity', 'Costs', 'Resource', 'org:resource'])
コード例 #6
0
 def setUp(self):
     test_log = get_log(create_test_log(log_name=general_example_test_filename,
                                        log_path=general_example_test_filepath_xes))
     training_log = get_log(create_test_log(log_name=general_example_train_filename,
                                            log_path=general_example_train_filepath))
     self.training_df, self.test_df = encode_label_logs(training_log,
                                                        test_log,
                                                        create_test_job(
                                                            encoding=create_test_encoding(
                                                                value_encoding=ValueEncodings.BOOLEAN.value,
                                                                add_elapsed_time=True
                                                            ),
                                                            predictive_model=create_test_predictive_model(
                                                                predictive_model=PredictiveModels.CLASSIFICATION.value
                                                            )
                                                        ))
コード例 #7
0
    def test_can_find_split_original_file(self):
        log = Log.objects.get(name="general_example.xes",
                              path=general_example_filepath)

        split = Split.objects.get(original_log=log)
        log_file = get_log(split.original_log)

        self.assertEqual(6, len(log_file))
コード例 #8
0
    def setUp(self):
        test_log = get_log(
            create_test_log(log_name=general_example_test_filename,
                            log_path=general_example_test_filepath_xes))
        training_log = get_log(
            create_test_log(log_name=general_example_train_filename,
                            log_path=general_example_train_filepath))
        self.encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=1)
        self.labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        self.training_df, self.test_df = encode_label_logs(
            training_log, test_log,
            create_test_job(encoding=self.encoding, labelling=self.labelling))
コード例 #9
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_test_filename,
                                        log_path=general_example_test_filepath_xes))
     self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.SIMPLE_INDEX.value,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         add_elapsed_time=True,
         prefix_length=1)
コード例 #10
0
 def test_trace_attributes(self):
     self.log = get_log(create_test_log(log_name=financial_log_filename,
                                        log_path=financial_log_filepath))
     result = trace_attributes(self.log)
     self.assertEqual(2, len(result))
     self.assertDictEqual({'name': 'AMOUNT_REQ', 'type': 'number', 'example': '20000'},
                          result[0])
     self.assertDictEqual({'name': 'REG_DATE', 'type': 'string', 'example': '2011-10-01 00:38:44.546000+02:00'},
                          result[1])
コード例 #11
0
ファイル: views.py プロジェクト: stebranchi/predict-python
def get_log_traces_attributes(request, pk):
    log = Log.objects.get(pk=pk)
    try:
        log_file = get_log(log)

    except FileNotFoundError:
        logger.error("Log id: %s, path %s not found", log.id, log.path)
        return Response({'error': 'log file not found'}, status=status.HTTP_404_NOT_FOUND)
    value = get_log_trace_attributes(log_file)
    return Response(value, status=status.HTTP_200_OK)
コード例 #12
0
    def test_replay_prediction(self):
        job = create_test_job(create_models=True)
        runtime_log = create_test_log(
            log_name='runtime_example.xes',
            log_path='cache/log_cache/test_logs/runtime_test.xes')
        log = get_log(runtime_log)
        prediction_task(job.id)
        job.refresh_from_db()

        replay_prediction_task(job, job, log)
コード例 #13
0
 def setUp(self):
     self.train_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath_xes))
     self.add_col = get_additional_columns(self.train_log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=2)
     self.encodingPadding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=10,
         padding=True)
コード例 #14
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_test_filename,
                                        log_path=general_example_test_filepath_xes))
     self.event_names = unique_events(self.log)
     self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value)
     self.add_col = get_additional_columns(self.log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.LAST_PAYLOAD.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=1)
コード例 #15
0
def _split_single_log(split: Split):
    log = get_log(split.original_log)
    logger.info("\t\tExecute single split ID {}, split_type {}, test_size {}".format(split.id, split.type, split.test_size))
    if split.splitting_method == SplitOrderingMethods.SPLIT_TEMPORAL.value:
        return _temporal_split(log, split.test_size)
    elif split.splitting_method == SplitOrderingMethods.SPLIT_STRICT_TEMPORAL.value:
        return _temporal_split_strict(log, split.test_size)
    elif split.splitting_method == SplitOrderingMethods.SPLIT_SEQUENTIAL.value:
        return _split_log(log, split.test_size, shuffle=False)
    elif split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value:
        return _split_log(log, split.test_size, random_state=None)
    else:
        raise ValueError('splitting method {} not recognized'.format(split.splitting_method))
コード例 #16
0
    def test_eval(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            add_elapsed_time=True,
            prefix_length=12,
            padding=True)
        df = simple_index(
            get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)),
            create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding)

        self.assertEqual(df.shape, (41, 15))
        row1 = df[df.trace_id == '4'].iloc[4]
        self.assertListEqual(
            ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0,
             0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist())
        self.assertFalse(df.isnull().values.any())
コード例 #17
0
def replay_prediction(replay_job: Job, training_initial_job: Job,
                      trace_id) -> list:
    """The function create a set with timestamps of events, then create a list of requests
        simulating the log in the time passing
        :param trace_id:
        :param replay_job: job dictionary
        :param training_initial_job: job dictionary
        :return: List of requests
    """

    split = replay_job.split
    log = get_log(split.train_log)
    requests_list = list()
    eventlog = EventLog()
    trace = log[int(trace_id)]
    for key in log.attributes.keys():
        eventlog.attributes[key] = log.attributes[key]
    for index in range(len(trace)):
        new_trace = Trace(trace[0:index])
        for key in trace.attributes:
            new_trace.attributes[key] = trace.attributes[key]
        eventlog.append(new_trace)
    replay_job.case_id = trace_id
    replay_job.event_number = len(trace)
    replay_job.save()
    try:
        logger.error("Sending request for replay_prediction task.")
        r = requests.post(
            url="http://127.0.0.1:8000/runtime/replay_prediction/",
            data=export_log_as_string(eventlog),
            params={
                'jobId': replay_job.id,
                'training_job': training_initial_job.id
            },
            headers={
                'Content-Type': 'text/plain',
                'charset': 'UTF-8'
            })
        requests_list.append(str(r))
    except Exception as e:
        requests_list.append(str(e))
        logger.warning(str(e))

    return requests_list
コード例 #18
0
def replay_core(replay_job: Job, training_initial_job: Job) -> list:
    """The function create a set with timestamps of events, then create a list of requests
        simulating the log in the time passing

        :param replay_job: job dictionary
        :param training_initial_job: job dictionary
        :return: List of requests
    """

    split = replay_job.split
    log = get_log(split.train_log)
    requests_list = list()

    eventlog = EventLog()
    for key in log.attributes.keys():
        eventlog.attributes[key] = log.attributes[key]
    for trace in log:
        new_trace = Trace(trace)
        for key in trace.attributes:
            new_trace.attributes[key] = trace.attributes[key]
        eventlog.append(new_trace)

    times = sorted(
        set([event['time:timestamp'] for trace in eventlog
             for event in trace]))

    for t in times[2::int((len(times) - 2) / 5)]:
        filtered_eventlog = timestamp_filter.apply_events(
            eventlog, times[0].replace(tzinfo=None), t.replace(tzinfo=None))
        trace_list = list()
        event_number = dict()
        for trace in filtered_eventlog:
            trace_list.append(trace.attributes['concept:name'])
            event_number[trace.attributes['concept:name']] = len(trace)
        replay_job.case_id = trace_list
        replay_job.event_number = event_number
        replay_job.save()
        try:  #TODO check logger usage
            logger.info("Sending request for replay_prediction task.")
            r = requests.post(
                url="http://server:8000/runtime/replay_prediction/",
                data=export_log_as_string(filtered_eventlog),
                params={
                    'jobId': replay_job.id,
                    'training_job': training_initial_job.id
                },
                headers={
                    'Content-Type': 'text/plain',
                    'charset': 'UTF-8'
                })
            requests_list.append(str(r))
        except Exception as e:
            requests_list.append(str(e))
            logger.warning(str(e))

    training_log, test_log, additional_columns = get_train_test_log(
        replay_job.split)
    training_df, _ = encode_label_logs(training_log,
                                       test_log,
                                       replay_job,
                                       additional_columns=additional_columns)

    gold_values = dict(zip(training_df['trace_id'], training_df['label']))
    parent_id = replay_job.id
    # final_job = duplicate_orm_row(replay_job)  #todo: replace with simple CREATE
    final_job = Job.objects.create(
        created_date=replay_job.created_date,
        modified_date=replay_job.modified_date,
        error=replay_job.error,
        status=replay_job.status,
        type=replay_job.type,
        create_models=replay_job.create_models,
        case_id=replay_job.case_id,
        event_number=replay_job.event_number,
        gold_value=replay_job.gold_value,
        results=replay_job.results,
        parent_job=replay_job.parent_job,
        split=replay_job.split,
        encoding=replay_job.encoding,
        labelling=replay_job.labelling,
        clustering=replay_job.clustering,
        predictive_model=replay_job.predictive_model,
        evaluation=replay_job.evaluation,
        hyperparameter_optimizer=replay_job.hyperparameter_optimizer,
        incremental_train=replay_job.incremental_train)
    final_job.parent_job = Job.objects.filter(pk=parent_id)[0]
    final_job.gold_value = gold_values
    final_job.type = JobTypes.REPLAY_PREDICT.value
    final_job.save()
    return requests_list
コード例 #19
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_filename,
                                        log_path=general_example_filepath))