Esempio n. 1
0
def prepare_logs(split: Split):
    """Returns training_log and test_log"""
    if split.type == SplitTypes.SPLIT_SINGLE.value:
        additional_columns = get_additional_columns(get_log(
            split.original_log))
        training_log, test_log = _split_single_log(split)
        logger.info("\t\tLoaded single log from {}".format(
            split.original_log.path))
    else:
        # Have to use sklearn to convert some internal data types
        training_log = get_log(split.train_log)
        additional_columns = get_additional_columns(training_log)
        if split.additional_columns is None:
            split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes'
            split.save()
        training_log, _ = train_test_split(training_log,
                                           test_size=0,
                                           shuffle=False)
        test_log, _ = train_test_split(get_log(split.test_log),
                                       test_size=0,
                                       shuffle=False)
        logger.info("\t\tLoaded double logs from {} and {}.".format(
            split.train_log.path, split.test_log.path))
    if len(training_log) == 0:
        raise TypeError(
            "Training log is empty. Create a new Split with better parameters")
    return training_log, test_log, additional_columns
Esempio n. 2
0
 def test_multiple_unique_events(self):
     test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     training_log = get_log(
         create_test_log(log_path=general_example_train_filepath,
                         log_name=general_example_train_filename))
     events = unique_events2(training_log, test_log)
     self.assertEqual(8, len(events))
Esempio n. 3
0
    def setUp(self):
        self.train_log = get_log(create_test_log(log_name=general_example_train_filename,
                                                 log_path=general_example_train_filepath))
        self.train_event_names = unique_events(self.train_log)
        self.train_add_col = get_additional_columns(self.train_log)

        self.test_log = get_log(create_test_log(log_name=general_example_test_filename,
                                                log_path=general_example_test_filepath))
        self.test_event_names = unique_events(self.test_log)
        self.test_add_col = get_additional_columns(self.test_log)
 def setUp(self):
     self.train_log = get_log(create_test_log(log_name=general_example_train_filename,
                                              log_path=general_example_train_filepath))
     self.test_log = get_log(create_test_log(log_name=general_example_test_filename,
                                             log_path=general_example_test_filepath))
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.SIMPLE_INDEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=1)
 def setUp(self):
     self.label = LabelContainer(LabelTypes.NO_LABEL.value)
     start_time = time.time()
     self.log1 = get_log("cache/log_cache/Sepsis Cases - Event Log.xes.gz")
     print("Total for %s %s seconds" % ("sepsis", time.time() - start_time))
     start_time = time.time()
     self.log2 = get_log("cache/log_cache/financial_log.xes.gz")
     print("Total for %s %s seconds" % ("financial", time.time() - start_time))
     start_time = time.time()
     self.log3 = get_log("cache/log_cache/BPI Challenge 2017.xes.gz")
     print("Total for %s %s seconds" % ("2017", time.time() - start_time))
 def setUp(self):
     test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     training_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.training_df, self.test_df = encode_label_logs(
         training_log, test_log,
         create_test_job(
             encoding=create_test_encoding(
                 value_encoding=ValueEncodings.BOOLEAN.value,
                 add_elapsed_time=True),
             predictive_model=create_test_predictive_model(
                 predictive_model=PredictiveModels.CLASSIFICATION.value)))
Esempio n. 7
0
    def test_can_find_log_file(self):
        log = Log.objects.get(name="general_example.xes",
                              path=general_example_filepath)

        log_file = get_log(log)

        self.assertEqual(6, len(log_file))
Esempio n. 8
0
 def test_global_event_attributes(self):
     log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     attributes = get_additional_columns(log)
     self.assertListEqual(attributes['event_attributes'],
                          ['Activity', 'Costs', 'Resource', 'org:resource'])
Esempio n. 9
0
    def test_can_find_split_original_file(self):
        log = Log.objects.get(name="general_example.xes",
                              path=general_example_filepath)

        split = Split.objects.get(original_log=log)
        log_file = get_log(split.original_log)

        self.assertEqual(6, len(log_file))
Esempio n. 10
0
    def setUp(self):
        test_log = get_log(
            create_test_log(log_name=general_example_test_filename,
                            log_path=general_example_test_filepath))
        training_log = get_log(
            create_test_log(log_name=general_example_train_filename,
                            log_path=general_example_train_filepath))
        self.encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            add_elapsed_time=True,
            task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
            prefix_length=1)
        self.labelling = create_test_labelling(
            label_type=LabelTypes.REMAINING_TIME.value)

        self.training_df, self.test_df = encode_label_logs(
            training_log, test_log,
            create_test_job(encoding=self.encoding, labelling=self.labelling))
 def test_trace_attributes(self):
     self.log = get_log(create_test_log(log_name=financial_log_filename,
                                        log_path=financial_log_filepath))
     result = trace_attributes(self.log)
     self.assertEqual(2, len(result))
     self.assertDictEqual({'name': 'AMOUNT_REQ', 'type': 'number', 'example': '20000'},
                          result[0])
     self.assertDictEqual({'name': 'REG_DATE', 'type': 'string', 'example': '2011-10-01 00:38:44.546000+02:00'},
                          result[1])
Esempio n. 12
0
    def test_replay_prediction(self):
        job = create_test_job(create_models=True)
        runtime_log = create_test_log(
            log_name='runtime_example.xes',
            log_path='cache/log_cache/test_logs/runtime_test.xes')
        log = get_log(runtime_log)
        prediction_task(job.id)
        job.refresh_from_db()

        replay_prediction_task(job, job, log)
 def setUp(self):
     self.train_log = get_log(
         create_test_log(log_name=general_example_train_filename,
                         log_path=general_example_train_filepath))
     self.test_log = get_log(
         create_test_log(log_name=general_example_test_filename,
                         log_path=general_example_test_filepath))
     self.add_col = get_additional_columns(self.train_log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=2)
     self.encodingPadding = create_test_encoding(
         value_encoding=ValueEncodings.COMPLEX.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=10,
         padding=True)
Esempio n. 14
0
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_test_filename,
                                        log_path=general_example_test_filepath))
     self.event_names = unique_events(self.log)
     self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value)
     self.add_col = get_additional_columns(self.log)
     self.encoding = create_test_encoding(
         value_encoding=ValueEncodings.LAST_PAYLOAD.value,
         add_elapsed_time=True,
         task_generation_type=TaskGenerationTypes.ONLY_THIS.value,
         prefix_length=1)
 def do_test(encoding):
     start_time = time.time()
     # log = get_logs("log_cache/general_example.xes")[0]
     log = get_log("cache/log_cache/Sepsis Cases - Event Log.xes")
     label = LabelContainer(LabelTypes.REMAINING_TIME.value, add_elapsed_time=True)
     encoding = EncodingContainer(encoding, prefix_length=185, generation_type=ALL_IN_ONE,
                                  padding=ZERO_PADDING)
     event_names = unique_events(log)
     log = encode_label_log(log, encoding, PredictiveModels.REGRESSION.value, label, event_names=event_names)
     print(log.shape)
     print("Total for %s %s seconds" % (encoding, time.time() - start_time))
Esempio n. 16
0
def _split_single_log(split: Split):
    log = get_log(split.original_log)
    print("\t\tExecute single split ID {}, split_type {}, test_size {}".format(
        split.id, split.type, split.test_size))
    if split.splitting_method == SplitOrderingMethods.SPLIT_TEMPORAL.value:
        return _temporal_split(log, split.test_size)
    elif split.splitting_method == SplitOrderingMethods.SPLIT_STRICT_TEMPORAL.value:
        return _temporal_split_strict(log, split.test_size)
    elif split.splitting_method == SplitOrderingMethods.SPLIT_SEQUENTIAL.value:
        return _split_log(log, split.test_size, shuffle=False)
    elif split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value:
        return _split_log(log, split.test_size, random_state=None)
    else:
        raise ValueError('splitting method {} not recognized'.format(
            split.splitting_method))
    def test_eval(self):
        encoding = create_test_encoding(
            value_encoding=ValueEncodings.FREQUENCY.value,
            task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value,
            add_elapsed_time=True,
            prefix_length=12,
            padding=True)
        df = simple_index(
            get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)),
            create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding)

        self.assertEqual(df.shape, (41, 15))
        row1 = df[df.trace_id == '4'].iloc[4]
        self.assertListEqual(
            ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0,
             0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist())
        self.assertFalse(df.isnull().values.any())
Esempio n. 18
0
def runtime_task(job, model):
    print("Start runtime task ID {}".format(job.pk))
    try:
        job.status = JobStatuses.RUNNING.value
        job.save()
        log = Log.objects.get(pk=job.config['log_id'])
        run_log = get_log(log.path)
        result_data = runtime_calculate(run_log, model.to_dict())
        result = result_data['prediction']
        job.result = result
        job.status = JobStatuses.COMPLETED.value
        job.error = ''
    except Exception as e:
        print("error " + str(e.__repr__()))
        job.status = JobStatuses.ERROR.value
        job.error = str(e.__repr__())
        raise e
    finally:
        job.save()
        publish(job)
def replay_core(replay_job: Job, training_initial_job: Job) -> list:

    split = replay_job.split
    log = get_log(split.train_log)
    requests_list = list()

    eventlog = EventLog()
    for key in log.attributes.keys():
        eventlog.attributes[key] = log.attributes[key]
    for trace in log:
        new_trace = Trace(trace)
        for key in trace.attributes:
            new_trace.attributes[key] = trace.attributes[key]
        eventlog.append(new_trace)

    times = sorted(
        set([event['time:timestamp'] for trace in eventlog
             for event in trace]))

    for t in times[2:]:
        filtered_eventlog = timestamp_filter.apply_events(
            eventlog, times[0].replace(tzinfo=None), t.replace(tzinfo=None))

        try:  #TODO check logger usage
            logger.info("Sending request for replay_prediction task.")
            r = requests.post(
                url="http://server:8000/runtime/replay_prediction/",
                data=export_log_as_string(filtered_eventlog),
                params={
                    'jobId': replay_job.id,
                    'training_job': training_initial_job.id
                },
                headers={
                    'Content-Type': 'text/plain',
                    'charset': 'UTF-8'
                })
            requests_list.append(str(r))
        except Exception as e:
            requests_list.append(str(e))
            logger.warning(str(e))
    return requests_list
Esempio n. 20
0
 def setUp(self):
     self.log = get_log("cache/log_cache/repairExample.xes")
     # self.log = get_logs("log_cache/BPI Challenge 2017.xes.gz")[0]
     self.label = LabelContainer(LabelTypes.NO_LABEL.value)
     self.add_col = get_additional_columns(self.log)
Esempio n. 21
0
    def test_can_find_split_original_file(self):
        """Split file can be found by id"""
        split = Split.objects.get(id=1)
        log_file = get_log(split.original_log)

        self.assertEqual(6, len(log_file))
Esempio n. 22
0
    def test_can_find_log_file(self):
        """Log file can be found by id"""
        log = Log.objects.get(id=1)
        log_file = get_log(log)

        self.assertEqual(6, len(log_file))
Esempio n. 23
0
 def execute(self, id):
     xlog = get_log(self.log)
     # t=threading.Thread(target=self.events_list, args=(xlog, id))
     self.events_list(xlog, id)
Esempio n. 24
0
def get_train_test_log(split: Split):
    """Returns training_log and test_log"""
    if split.type == SplitTypes.SPLIT_SINGLE.value and Split.objects.filter(
            type=SplitTypes.SPLIT_DOUBLE.value,
            original_log=split.original_log,
            test_size=split.test_size,
            splitting_method=split.splitting_method
    ).exists(
    ) and split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
        return get_train_test_log(
            Split.objects.filter(type=SplitTypes.SPLIT_DOUBLE.value,
                                 original_log=split.original_log,
                                 test_size=split.test_size,
                                 splitting_method=split.splitting_method)[0])
    elif split.original_log is not None and (
            not Split.objects.filter(
                type=SplitTypes.SPLIT_DOUBLE.value,
                original_log=split.original_log,
                test_size=split.test_size,
                splitting_method=split.splitting_method).exists() or
            split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value):
        training_log, test_log = _split_single_log(split)
        additional_columns = get_additional_columns(get_log(
            split.original_log))

        if split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
            _ = Split.objects.get_or_create(
                type=SplitTypes.SPLIT_DOUBLE.value,
                original_log=split.original_log,
                test_size=split.test_size,
                splitting_method=split.splitting_method,
                train_log=create_log(
                    EventLog(training_log),
                    '0-' + str(100 - int(split.test_size * 100)) + '.xes'),
                test_log=create_log(
                    EventLog(test_log),
                    str(100 - int(split.test_size * 100)) + '-100.xes'),
                additional_columns=split.additional_columns)[0]

        logger.info("\t\tLoaded single log from {}".format(
            split.original_log.path))
    else:
        # Have to use sklearn to convert some internal data types
        training_log = get_log(split.train_log)
        additional_columns = get_additional_columns(training_log)
        if split.additional_columns is None:
            split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes'
            split.save()
        training_log, train_log_to_append = train_test_split(training_log,
                                                             test_size=0,
                                                             shuffle=False)
        test_log, test_log_to_append = train_test_split(get_log(
            split.test_log),
                                                        test_size=0,
                                                        shuffle=False)
        logger.info("\t\tLoaded double logs from {} and {}.".format(
            split.train_log.path, split.test_log.path))
    if len(training_log) == 0:
        raise TypeError(
            "Training log is empty. Create a new Split with better parameters")
    return training_log, test_log, additional_columns
 def setUp(self):
     self.log = get_log(create_test_log(log_name=general_example_filename,
                                        log_path=general_example_filepath))