def prepare_logs(split: Split): """Returns training_log and test_log""" if split.type == SplitTypes.SPLIT_SINGLE.value: additional_columns = get_additional_columns(get_log( split.original_log)) training_log, test_log = _split_single_log(split) logger.info("\t\tLoaded single log from {}".format( split.original_log.path)) else: # Have to use sklearn to convert some internal data types training_log = get_log(split.train_log) additional_columns = get_additional_columns(training_log) if split.additional_columns is None: split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes' split.save() training_log, _ = train_test_split(training_log, test_size=0, shuffle=False) test_log, _ = train_test_split(get_log(split.test_log), test_size=0, shuffle=False) logger.info("\t\tLoaded double logs from {} and {}.".format( split.train_log.path, split.test_log.path)) if len(training_log) == 0: raise TypeError( "Training log is empty. Create a new Split with better parameters") return training_log, test_log, additional_columns
def test_multiple_unique_events(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) training_log = get_log( create_test_log(log_path=general_example_train_filepath, log_name=general_example_train_filename)) events = unique_events2(training_log, test_log) self.assertEqual(8, len(events))
def setUp(self): self.train_log = get_log(create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.train_event_names = unique_events(self.train_log) self.train_add_col = get_additional_columns(self.train_log) self.test_log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.test_event_names = unique_events(self.test_log) self.test_add_col = get_additional_columns(self.test_log)
def setUp(self): self.train_log = get_log(create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.test_log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1)
def setUp(self): self.label = LabelContainer(LabelTypes.NO_LABEL.value) start_time = time.time() self.log1 = get_log("cache/log_cache/Sepsis Cases - Event Log.xes.gz") print("Total for %s %s seconds" % ("sepsis", time.time() - start_time)) start_time = time.time() self.log2 = get_log("cache/log_cache/financial_log.xes.gz") print("Total for %s %s seconds" % ("financial", time.time() - start_time)) start_time = time.time() self.log3 = get_log("cache/log_cache/BPI Challenge 2017.xes.gz") print("Total for %s %s seconds" % ("2017", time.time() - start_time))
def setUp(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) training_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.training_df, self.test_df = encode_label_logs( training_log, test_log, create_test_job( encoding=create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, add_elapsed_time=True), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)))
def test_can_find_log_file(self): log = Log.objects.get(name="general_example.xes", path=general_example_filepath) log_file = get_log(log) self.assertEqual(6, len(log_file))
def test_global_event_attributes(self): log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) attributes = get_additional_columns(log) self.assertListEqual(attributes['event_attributes'], ['Activity', 'Costs', 'Resource', 'org:resource'])
def test_can_find_split_original_file(self): log = Log.objects.get(name="general_example.xes", path=general_example_filepath) split = Split.objects.get(original_log=log) log_file = get_log(split.original_log) self.assertEqual(6, len(log_file))
def setUp(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) training_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) self.labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) self.training_df, self.test_df = encode_label_logs( training_log, test_log, create_test_job(encoding=self.encoding, labelling=self.labelling))
def test_trace_attributes(self): self.log = get_log(create_test_log(log_name=financial_log_filename, log_path=financial_log_filepath)) result = trace_attributes(self.log) self.assertEqual(2, len(result)) self.assertDictEqual({'name': 'AMOUNT_REQ', 'type': 'number', 'example': '20000'}, result[0]) self.assertDictEqual({'name': 'REG_DATE', 'type': 'string', 'example': '2011-10-01 00:38:44.546000+02:00'}, result[1])
def test_replay_prediction(self): job = create_test_job(create_models=True) runtime_log = create_test_log( log_name='runtime_example.xes', log_path='cache/log_cache/test_logs/runtime_test.xes') log = get_log(runtime_log) prediction_task(job.id) job.refresh_from_db() replay_prediction_task(job, job, log)
def setUp(self): self.train_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.add_col = get_additional_columns(self.train_log) self.encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) self.encodingPadding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True)
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) self.event_names = unique_events(self.log) self.labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) self.add_col = get_additional_columns(self.log) self.encoding = create_test_encoding( value_encoding=ValueEncodings.LAST_PAYLOAD.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1)
def do_test(encoding): start_time = time.time() # log = get_logs("log_cache/general_example.xes")[0] log = get_log("cache/log_cache/Sepsis Cases - Event Log.xes") label = LabelContainer(LabelTypes.REMAINING_TIME.value, add_elapsed_time=True) encoding = EncodingContainer(encoding, prefix_length=185, generation_type=ALL_IN_ONE, padding=ZERO_PADDING) event_names = unique_events(log) log = encode_label_log(log, encoding, PredictiveModels.REGRESSION.value, label, event_names=event_names) print(log.shape) print("Total for %s %s seconds" % (encoding, time.time() - start_time))
def _split_single_log(split: Split): log = get_log(split.original_log) print("\t\tExecute single split ID {}, split_type {}, test_size {}".format( split.id, split.type, split.test_size)) if split.splitting_method == SplitOrderingMethods.SPLIT_TEMPORAL.value: return _temporal_split(log, split.test_size) elif split.splitting_method == SplitOrderingMethods.SPLIT_STRICT_TEMPORAL.value: return _temporal_split_strict(log, split.test_size) elif split.splitting_method == SplitOrderingMethods.SPLIT_SEQUENTIAL.value: return _split_log(log, split.test_size, shuffle=False) elif split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value: return _split_log(log, split.test_size, random_state=None) else: raise ValueError('splitting method {} not recognized'.format( split.splitting_method))
def test_eval(self): encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, add_elapsed_time=True, prefix_length=12, padding=True) df = simple_index( get_log(create_test_log(log_path=general_example_filepath, log_name=general_example_filename)), create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value), encoding) self.assertEqual(df.shape, (41, 15)) row1 = df[df.trace_id == '4'].iloc[4] self.assertListEqual( ['4', 'register request', 'check ticket', 'examine thoroughly', 'decide', 'reject request', 0, 0, 0, 0, 0, 0, 0, 520920.0, 0.0], row1.values.tolist()) self.assertFalse(df.isnull().values.any())
def runtime_task(job, model): print("Start runtime task ID {}".format(job.pk)) try: job.status = JobStatuses.RUNNING.value job.save() log = Log.objects.get(pk=job.config['log_id']) run_log = get_log(log.path) result_data = runtime_calculate(run_log, model.to_dict()) result = result_data['prediction'] job.result = result job.status = JobStatuses.COMPLETED.value job.error = '' except Exception as e: print("error " + str(e.__repr__())) job.status = JobStatuses.ERROR.value job.error = str(e.__repr__()) raise e finally: job.save() publish(job)
def replay_core(replay_job: Job, training_initial_job: Job) -> list: split = replay_job.split log = get_log(split.train_log) requests_list = list() eventlog = EventLog() for key in log.attributes.keys(): eventlog.attributes[key] = log.attributes[key] for trace in log: new_trace = Trace(trace) for key in trace.attributes: new_trace.attributes[key] = trace.attributes[key] eventlog.append(new_trace) times = sorted( set([event['time:timestamp'] for trace in eventlog for event in trace])) for t in times[2:]: filtered_eventlog = timestamp_filter.apply_events( eventlog, times[0].replace(tzinfo=None), t.replace(tzinfo=None)) try: #TODO check logger usage logger.info("Sending request for replay_prediction task.") r = requests.post( url="http://server:8000/runtime/replay_prediction/", data=export_log_as_string(filtered_eventlog), params={ 'jobId': replay_job.id, 'training_job': training_initial_job.id }, headers={ 'Content-Type': 'text/plain', 'charset': 'UTF-8' }) requests_list.append(str(r)) except Exception as e: requests_list.append(str(e)) logger.warning(str(e)) return requests_list
def setUp(self): self.log = get_log("cache/log_cache/repairExample.xes") # self.log = get_logs("log_cache/BPI Challenge 2017.xes.gz")[0] self.label = LabelContainer(LabelTypes.NO_LABEL.value) self.add_col = get_additional_columns(self.log)
def test_can_find_split_original_file(self): """Split file can be found by id""" split = Split.objects.get(id=1) log_file = get_log(split.original_log) self.assertEqual(6, len(log_file))
def test_can_find_log_file(self): """Log file can be found by id""" log = Log.objects.get(id=1) log_file = get_log(log) self.assertEqual(6, len(log_file))
def execute(self, id): xlog = get_log(self.log) # t=threading.Thread(target=self.events_list, args=(xlog, id)) self.events_list(xlog, id)
def get_train_test_log(split: Split): """Returns training_log and test_log""" if split.type == SplitTypes.SPLIT_SINGLE.value and Split.objects.filter( type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method ).exists( ) and split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value: return get_train_test_log( Split.objects.filter(type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method)[0]) elif split.original_log is not None and ( not Split.objects.filter( type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method).exists() or split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value): training_log, test_log = _split_single_log(split) additional_columns = get_additional_columns(get_log( split.original_log)) if split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value: _ = Split.objects.get_or_create( type=SplitTypes.SPLIT_DOUBLE.value, original_log=split.original_log, test_size=split.test_size, splitting_method=split.splitting_method, train_log=create_log( EventLog(training_log), '0-' + str(100 - int(split.test_size * 100)) + '.xes'), test_log=create_log( EventLog(test_log), str(100 - int(split.test_size * 100)) + '-100.xes'), additional_columns=split.additional_columns)[0] logger.info("\t\tLoaded single log from {}".format( split.original_log.path)) else: # Have to use sklearn to convert some internal data types training_log = get_log(split.train_log) additional_columns = get_additional_columns(training_log) if split.additional_columns is None: split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes' split.save() training_log, train_log_to_append = train_test_split(training_log, test_size=0, shuffle=False) test_log, test_log_to_append = train_test_split(get_log( split.test_log), test_size=0, shuffle=False) logger.info("\t\tLoaded double logs from {} and {}.".format( split.train_log.path, split.test_log.path)) if len(training_log) == 0: raise TypeError( "Training log is empty. Create a new Split with better parameters") return training_log, test_log, additional_columns
def setUp(self): self.log = get_log(create_test_log(log_name=general_example_filename, log_path=general_example_filepath))