def get_encoded_logs(job: Job, use_cache: bool = True) -> (DataFrame, DataFrame): """returns the encoded logs returns the training and test DataFrames encoded using the given job configuration, loading from cache if possible :param job: job configuration :param use_cache: load or not saved datasets from cache :return: training and testing DataFrame """ print('\tGetting Dataset') if use_cache: if LabelledLog.objects.filter(split=job.split, encoding=job.encoding, labelling=job.labelling).exists(): training_df, test_df = get_labelled_logs(job) else: if job.split.train_log is not None and \ job.split.test_log is not None and \ LoadedLog.objects.filter(train_log=job.split.train_log.path, test_log=job.split.test_log.path).exists(): training_log, test_log, additional_columns = get_loaded_logs( job.split) else: training_log, test_log, additional_columns = prepare_logs( job.split) if job.split.type == SplitTypes.SPLIT_SINGLE.value: job.split = duplicate_orm_row(job.split) job.split.type = SplitTypes.SPLIT_DOUBLE.value train_name = '0-' + str( int(100 - (job.split.test_size * 100))) job.split.train_log = create_log(EventLog(training_log), train_name + '.xes') test_name = str(int(100 - (job.split.test_size * 100))) + '-100' job.split.test_log = create_log(EventLog(test_log), test_name + '.xes') job.split.additional_columns = str( train_name + test_name) # TODO: find better naming policy job.save() put_loaded_logs(job.split, training_log, test_log, additional_columns) training_df, test_df = encode_label_logs( training_log, test_log, job, additional_columns=additional_columns) put_labelled_logs(job, training_df, test_df) else: training_log, test_log, additional_columns = prepare_logs(job.split) training_df, test_df = encode_label_logs( training_log, test_log, job, additional_columns=additional_columns) return training_df, test_df
def test_label_remaining_time_with_elapsed_time_custom_threshold(self): encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, add_remaining_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value, threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value, threshold=40000) df, _ = encode_label_logs( self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 5)) self.assertListEqual( df.columns.values.tolist(), ['trace_id', 'prefix_1', 'prefix_2', 'elapsed_time', 'label']) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual(trace_5, ['5', 1, 2, 0, 0]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual(trace_4, ['4', 1, 1, 0, 0])
def test_label_remaining_time_with_elapsed_time_custom_threshold(self): labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value, threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value, threshold=40000) encoding = create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, prefix_length=3, add_elapsed_time=True, add_remaining_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value) _, df = encode_label_logs( self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 10)) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual(trace_5, [ '5', True, True, True, False, False, False, False, 181200.0, False ]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual(trace_4, [ '4', True, False, True, False, False, False, True, 171660.0, False ])
def test_next_activity_zero_padding_elapsed_time(self): labelling = create_test_labelling( label_type=LabelTypes.NEXT_ACTIVITY.value) encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True) df, _ = encode_label_logs( self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 13)) self.assertTrue('elapsed_time' in df.columns.values.tolist()) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual(trace_5, ['5', 1, 3, 2, 2, 2, 0, 0, 0, 0, 1296240.0, 2]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual(trace_4, [ '4', 52903968, 32171502, 17803069, 1149821, 72523760, 0, 0, 0, 0, 0, 520920.0, 0 ])
def test_attribute_number(self): encoding = create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, prefix_length=2, task_generation_type=TaskGenerationTypes.ONLY_THIS.value) labelling = create_test_labelling( label_type=LabelTypes.ATTRIBUTE_NUMBER.value, attribute_name='AMOUNT') _, df = encode_label_logs( self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 9)) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual( trace_5, ['5', True, True, False, False, False, False, False, False]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual( trace_4, ['4', True, False, True, False, False, False, False, True])
def test_next_activity_zero_padding_elapsed_time(self): labelling = create_test_labelling( label_type=LabelTypes.NEXT_ACTIVITY.value) encoding = create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=3) _, df = encode_label_logs( self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 10)) self.assertTrue('elapsed_time' in df.columns.values.tolist()) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual(trace_5, [ '5', True, True, True, False, False, False, False, 181200.0, 'decide' ]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual(trace_4, [ '4', True, False, True, False, False, False, True, 171660.0, 'decide' ])
def test_shape_training(self): training_df, test_df = encode_label_logs(self.training_log, self.test_log, create_test_job( encoding=self.encoding, labelling=self.labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value) )) self.assert_shape(training_df, (4, 4)) self.assert_shape(test_df, (2, 4))
def test_no_label(self): labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=self.encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 9))
def test_remaining_time(self): labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=self.encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 11))
def test_row_test(self): training_df, test_df = encode_label_logs(self.training_log, self.test_log, create_test_job( encoding=self.encoding, labelling=self.labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value) )) row = test_df[(test_df.trace_id == '4')].iloc[0] self.assertEqual(1, row.prefix_1) self.assertEqual(0, row.elapsed_time) self.assertEqual(0, row.label)
def test_next_activity(self): labelling = create_test_labelling( label_type=LabelTypes.NEXT_ACTIVITY.value) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=self.encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)), get_additional_columns(self.train_log)) self.assertEqual(df.shape, (2, 14))
def test_attribute_number(self): labelling = create_test_labelling( label_type=LabelTypes.ATTRIBUTE_NUMBER.value, attribute_name='AMOUNT') _, df = encode_label_logs( self.test_log, self.test_log, create_test_job( encoding=self.encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)), get_additional_columns(self.test_log)) self.assertEqual(df.shape, (2, 15))
def replay_prediction_calculate(job: Job, log) -> dict: """calculate the prediction for the log coming from replayers :param job: job idctionary :param log: log model :return: runtime results """ additional_columns = get_additional_columns(log) data_df, _ = train_test_split(log, test_size=0, shuffle=False) data_df, _ = encode_label_logs(data_df, EventLog(), job, additional_columns) results = MODEL[job.predictive_model.predictive_model][ModelActions.PREDICT.value](job, data_df) logger.info("End {} job {}, {} . Results {}".format('runtime', job.predictive_model.predictive_model, get_run(job), results)) return results
def setUp(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath)) training_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.training_df, self.test_df = encode_label_logs( training_log, test_log, create_test_job( encoding=create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, add_elapsed_time=True), predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)))
def setUp(self): test_log = get_log( create_test_log(log_name=general_example_test_filename, log_path=general_example_test_filepath_xes)) training_log = get_log( create_test_log(log_name=general_example_train_filename, log_path=general_example_train_filepath)) self.encoding = create_test_encoding( value_encoding=ValueEncodings.FREQUENCY.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) self.labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) self.training_df, self.test_df = encode_label_logs( training_log, test_log, create_test_job(encoding=self.encoding, labelling=self.labelling))
def test_add_new_traces(self): labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, add_new_traces=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=1) df, _ = encode_label_logs(self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value) )) self.assertEqual(df.shape, (2, 5)) self.assertTrue('new_traces' in df.columns.values.tolist()) self.assertListEqual(df['new_traces'].tolist(), [2, 2])
def test_duration(self): labelling = create_test_labelling(label_type=LabelTypes.DURATION.value) encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) df, _ = encode_label_logs(self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value) )) self.assertEqual(df.shape, (2, 4)) self.assertListEqual(df.columns.values.tolist(), ['trace_id', 'prefix_1', 'prefix_2', 'label']) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual(trace_5, ['5', 1, 2, 0]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual(trace_4, ['4', 1, 1, 0])
def test_no_label(self): encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2) labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value) df, _ = encode_label_logs( self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 3)) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual(trace_5, ['5', 1, 2]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual(trace_4, ['4', 1, 1])
def test_add_resources_used(self): labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) encoding = create_test_encoding( value_encoding=ValueEncodings.BOOLEAN.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=2, add_resources_used=True) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value))) self.assertEqual(df.shape, (2, 12)) self.assertTrue('resources_used' in df.columns.values.tolist()) self.assertListEqual(df['resources_used'].tolist(), [1, 1])
def test_no_label_zero_padding(self): # add things have no effect labelling = create_test_labelling(label_type=LabelTypes.NO_LABEL.value) encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, add_remaining_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)), get_additional_columns(self.train_log)) self.assertEqual(df.shape, (2, 52))
def test_add_new_traces(self): labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value) encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, prefix_length=2, add_new_traces=True, add_elapsed_time=True) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)), get_additional_columns(self.train_log)) self.assertEqual(df.shape, (2, 15)) self.assertTrue('new_traces' in df.columns.values.tolist()) self.assertListEqual(df['new_traces'].tolist(), [0, 0])
def test_next_activity_zero_padding_elapsed_time(self): labelling = create_test_labelling( label_type=LabelTypes.NEXT_ACTIVITY.value) encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, add_remaining_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)), get_additional_columns(self.train_log)) self.assertEqual(df.shape, (2, 55)) self.assertTrue('elapsed_time' in df.columns.values.tolist())
def test_remaining_time_zero_padding(self): labelling = create_test_labelling(label_type=LabelTypes.REMAINING_TIME.value) encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True) df, _ = encode_label_logs(self.test_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value) )) self.assertEqual(df.shape, (2, 13)) trace_5 = df[df.trace_id == '5'].iloc[0].values.tolist() self.assertListEqual(trace_5, ['5', 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]) trace_4 = df[df.trace_id == '4'].iloc[0].values.tolist() self.assertListEqual(trace_4, ['4', 1, 1, 2, 1, 2, 0, 0, 0, 0, 0, 0, 0])
def test_label_remaining_time_with_elapsed_time_custom_threshold(self): labelling = create_test_labelling( label_type=LabelTypes.REMAINING_TIME.value, threshold_type=ThresholdTypes.THRESHOLD_CUSTOM.value, threshold=40000) encoding = create_test_encoding( value_encoding=ValueEncodings.COMPLEX.value, add_elapsed_time=True, add_remaining_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=10, padding=True) _, df = encode_label_logs( self.train_log, self.test_log, create_test_job( encoding=encoding, labelling=labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value)), get_additional_columns(self.test_log)) self.assertEqual(df.shape, (2, 55))
def test_prefix_length_training(self): encoding = create_test_encoding( value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=True, task_generation_type=TaskGenerationTypes.ONLY_THIS.value, prefix_length=3) training_df, test_df = encode_label_logs(self.training_log, self.test_log, create_test_job( encoding=encoding, labelling=self.labelling, predictive_model=create_test_predictive_model( predictive_model=PredictiveModels.CLASSIFICATION.value) )) self.assertIn("prefix_1", training_df.columns.values) self.assertIn("prefix_2", training_df.columns.values) self.assertIn("prefix_3", training_df.columns.values) self.assertEqual((4, 6), training_df.shape) self.assertEqual((2, 6), test_df.shape) row = training_df[(training_df.trace_id == '3')].iloc[0] self.assertEqual(1, row.prefix_1) self.assertEqual(2, row.prefix_2) self.assertEqual(1, row.prefix_3) self.assertEqual(False, row.label) self.assertEqual(0, row.elapsed_time)
def get_encoded_logs(job: Job, use_cache: bool = True) -> (DataFrame, DataFrame): """returns the encoded logs returns the training and test DataFrames encoded using the given job configuration, loading from cache if possible :param job: job configuration :param use_cache: load or not saved datasets from cache :return: training and testing DataFrame """ logger.info('\tGetting Dataset') if use_cache and \ (job.predictive_model is not None and job.predictive_model.predictive_model != PredictiveModels.TIME_SERIES_PREDICTION.value): if LabelledLog.objects.filter(split=job.split, encoding=job.encoding, labelling=job.labelling).exists(): try: training_df, test_df = get_labelled_logs(job) except FileNotFoundError: #cache invalidation LabelledLog.objects.filter(split=job.split, encoding=job.encoding, labelling=job.labelling).delete() logger.info('\t\tError pre-labeled cache invalidated!') return get_encoded_logs(job, use_cache) else: if job.split.train_log is not None and \ job.split.test_log is not None and \ LoadedLog.objects.filter(split=job.split).exists(): try: training_log, test_log, additional_columns = get_loaded_logs(job.split) except FileNotFoundError: # cache invalidation LoadedLog.objects.filter(split=job.split).delete() logger.info('\t\tError pre-loaded cache invalidated!') return get_encoded_logs(job, use_cache) else: training_log, test_log, additional_columns = get_train_test_log(job.split) if job.split.type == SplitTypes.SPLIT_SINGLE.value: job.split = duplicate_orm_row(Split.objects.filter(pk=job.split.pk)[0]) job.split.type = SplitTypes.SPLIT_DOUBLE.value train_name = '0-' + str(int(100 - (job.split.test_size * 100))) job.split.train_log = create_log( EventLog(training_log), train_name + '.xes' ) test_name = str(int(100 - (job.split.test_size * 100))) + '-100' job.split.test_log = create_log( EventLog(test_log), test_name + '.xes' ) job.split.additional_columns = str(train_name + test_name) # TODO: find better naming policy job.split.save() put_loaded_logs(job.split, training_log, test_log, additional_columns) training_df, test_df = encode_label_logs( training_log, test_log, job, additional_columns=additional_columns) put_labelled_logs(job, training_df, test_df) else: training_log, test_log, additional_columns = get_train_test_log(job.split) training_df, test_df = encode_label_logs(training_log, test_log, job, additional_columns=additional_columns) return training_df, test_df
def progetto_padova(): JOB = Job.objects.get_or_create( status=JobStatuses.CREATED.value, type=JobTypes.PREDICTION.value, split=Split.objects.get_or_create( # this creates the split of the log type=SplitTypes.SPLIT_DOUBLE.value, train_log=create_log( # this imports the log import_log(BASE_DIR + RELATIVE_TRAIN_PATH), RELATIVE_TRAIN_PATH, BASE_DIR, import_in_cache=False), test_log=create_log( # this imports the log import_log(BASE_DIR + RELATIVE_VALIDATION_PATH), RELATIVE_VALIDATION_PATH, BASE_DIR, import_in_cache=False))[0], encoding=Encoding.objects. get_or_create( # this defines the encoding method data_encoding=DataEncodings.LABEL_ENCODER.value, value_encoding=ValueEncodings.SIMPLE_INDEX.value, add_elapsed_time=False, add_remaining_time=False, add_executed_events=False, add_resources_used=False, add_new_traces=False, prefix_length=5, padding=True, task_generation_type=TaskGenerationTypes.ALL_IN_ONE.value, features=[])[0], labelling=Labelling.objects.get_or_create( # this defines the label type=LabelTypes.ATTRIBUTE_STRING.value, attribute_name='label', threshold_type=None, threshold=None)[0], clustering=Clustering.init(ClusteringMethods.NO_CLUSTER.value, configuration={}), predictive_model=PredictiveModel. init( # this defines the predictive model get_prediction_method_config( PredictiveModels.CLASSIFICATION.value, ClassificationMethods.DECISION_TREE.value, payload={ 'max_depth': 2, 'min_samples_split': 2, 'min_samples_leaf': 2 })), hyperparameter_optimizer=HyperparameterOptimization.init( { # this defines the hyperparameter optimisation procedure 'type': HyperparameterOptimizationMethods.HYPEROPT.value, 'max_evaluations': 10, 'performance_metric': HyperOptAlgorithms.TPE.value, 'algorithm_type': HyperOptLosses.AUC.value }), create_models=True)[0] # load log train_log, test_log, additional_columns = get_train_test_log(JOB.split) # encode train_df, test_df = encode_label_logs(train_log, test_log, JOB) # train + evaluate results, model_split = MODEL[JOB.predictive_model.predictive_model][ ModelActions.BUILD_MODEL_AND_TEST.value](train_df, test_df, _init_clusterer( JOB.clustering, train_df), JOB) if JOB.create_models: save_models(model_split, JOB) # predict data_df = pd.concat([train_df, test_df]) results = MODEL[JOB.predictive_model.predictive_model][ ModelActions.PREDICT.value](JOB, data_df) results = MODEL[JOB.predictive_model.predictive_model][ ModelActions.PREDICT_PROBA.value](JOB, data_df) # lime exp = Explanation.objects.get_or_create( type=ExplanationTypes.LIME.value, split=JOB. split, # this defines the analysed log, you can use a different one from the training one predictive_model=JOB.predictive_model, job=JOB)[0] error, result = explanation(exp.id, int(EXPLANATION_TARGET))
def replay_core(replay_job: Job, training_initial_job: Job) -> list: """The function create a set with timestamps of events, then create a list of requests simulating the log in the time passing :param replay_job: job dictionary :param training_initial_job: job dictionary :return: List of requests """ split = replay_job.split log = get_log(split.train_log) requests_list = list() eventlog = EventLog() for key in log.attributes.keys(): eventlog.attributes[key] = log.attributes[key] for trace in log: new_trace = Trace(trace) for key in trace.attributes: new_trace.attributes[key] = trace.attributes[key] eventlog.append(new_trace) times = sorted( set([event['time:timestamp'] for trace in eventlog for event in trace])) for t in times[2::int((len(times) - 2) / 5)]: filtered_eventlog = timestamp_filter.apply_events( eventlog, times[0].replace(tzinfo=None), t.replace(tzinfo=None)) trace_list = list() event_number = dict() for trace in filtered_eventlog: trace_list.append(trace.attributes['concept:name']) event_number[trace.attributes['concept:name']] = len(trace) replay_job.case_id = trace_list replay_job.event_number = event_number replay_job.save() try: #TODO check logger usage logger.info("Sending request for replay_prediction task.") r = requests.post( url="http://server:8000/runtime/replay_prediction/", data=export_log_as_string(filtered_eventlog), params={ 'jobId': replay_job.id, 'training_job': training_initial_job.id }, headers={ 'Content-Type': 'text/plain', 'charset': 'UTF-8' }) requests_list.append(str(r)) except Exception as e: requests_list.append(str(e)) logger.warning(str(e)) training_log, test_log, additional_columns = get_train_test_log( replay_job.split) training_df, _ = encode_label_logs(training_log, test_log, replay_job, additional_columns=additional_columns) gold_values = dict(zip(training_df['trace_id'], training_df['label'])) parent_id = replay_job.id # final_job = duplicate_orm_row(replay_job) #todo: replace with simple CREATE final_job = Job.objects.create( created_date=replay_job.created_date, modified_date=replay_job.modified_date, error=replay_job.error, status=replay_job.status, type=replay_job.type, create_models=replay_job.create_models, case_id=replay_job.case_id, event_number=replay_job.event_number, gold_value=replay_job.gold_value, results=replay_job.results, parent_job=replay_job.parent_job, split=replay_job.split, encoding=replay_job.encoding, labelling=replay_job.labelling, clustering=replay_job.clustering, predictive_model=replay_job.predictive_model, evaluation=replay_job.evaluation, hyperparameter_optimizer=replay_job.hyperparameter_optimizer, incremental_train=replay_job.incremental_train) final_job.parent_job = Job.objects.filter(pk=parent_id)[0] final_job.gold_value = gold_values final_job.type = JobTypes.REPLAY_PREDICT.value final_job.save() return requests_list