def mannage_results(self) -> None: self.response, measurements = self.define_response( self.status, self.sim_values, self.settings) if self.settings['exec_mode'] in ['optimizer', 'tasks_optimizer']: if os.path.getsize( os.path.join('outputs', self.settings['temp_file'])) > 0: sup.create_csv_file(measurements, os.path.join('outputs', self.settings['temp_file']), mode='a') else: sup.create_csv_file_header( measurements, os.path.join('outputs', self.settings['temp_file'])) else: print('------ Final results ------') [ print(k, v, sep=': ') for k, v in self.response.items() if k != 'params' ] self.response.pop('params', None) sup.create_csv_file_header( [self.response], os.path.join('outputs', self.settings['temp_file']))
def save_results(measurements, feature, parms): if measurements: if parms['is_single_exec']: output_route = os.path.join('output_files', parms['folder'], 'results') model_name, _ = os.path.splitext(parms['model_file']) sup.create_csv_file_header( measurements, os.path.join( output_route, model_name + '_' + feature + '_' + parms['activity'] + '.csv')) else: if os.path.exists( os.path.join( 'output_files', feature + '_' + parms['activity'] + '.csv')): sup.create_csv_file(measurements, os.path.join( 'output_files', feature + '_' + parms['activity'] + '.csv'), mode='a') else: sup.create_csv_file_header( measurements, os.path.join( 'output_files', feature + '_' + parms['activity'] + '.csv'))
def print_measures(self): """ Prints the similarity results detail """ print_path = os.path.join(self.output, 'sim_data', 'measures.csv') if os.path.exists(print_path): sup.create_csv_file(self.measures, print_path, mode='a') else: sup.create_csv_file_header(self.measures, print_path)
def save_times(times, settings): times = [{**{'output': settings['output']}, **times}] log_file = os.path.join('outputs', 'execution_times.csv') if not os.path.exists(log_file): open(log_file, 'w').close() if os.path.getsize(log_file) > 0: sup.create_csv_file(times, log_file, mode='a') else: sup.create_csv_file_header(times, log_file)
def on_train_end(self, logs={}): log_file = os.path.join('output_files', 'training_times.csv') data = [{ 'output_folder': self.output_folder, 'train_epochs': len(self.logs), 'avg_time': np.mean(self.logs), 'min_time': np.min(self.logs), 'max_time': np.max(self.logs) }] if os.path.exists(log_file): sup.create_csv_file(data, log_file, mode='a') else: sup.create_csv_file_header(data, log_file)
def print_measures(settings, measurements): if os.path.exists(os.path.join(os.path.join(settings['output'], 'sim_data', 'similarity_measures.csv'))): sup.create_csv_file(measurements, os.path.join(os.path.join(settings['output'], 'sim_data', 'similarity_measures.csv')), mode='a') else: sup.create_csv_file_header(measurements, os.path.join(os.path.join(settings['output'], 'sim_data', 'similarity_measures.csv')))
def save_results(measurements, feature, is_single_exec, parameters): output_route = os.path.join('output_files', parameters['folder']) model_name, _ = os.path.splitext(parameters['model_file']) if measurements: if is_single_exec: sup.create_csv_file_header(measurements, os.path.join(output_route, model_name +'_'+feature+'_full_suff.csv')) else: if os.path.exists(os.path.join('output_files', 'full_'+feature+'_suffix_measures.csv')): sup.create_csv_file(measurements, os.path.join('output_files', 'full_'+feature+'_suffix_measures.csv'), mode='a') else: sup.create_csv_file_header(measurements, os.path.join('output_files', 'full_'+feature+'_suffix_measures.csv'))
def save_results(measurements, feature, is_single_exec, model_file, output_folder): model_name, _ = os.path.splitext(model_file) if measurements: if is_single_exec: sup.create_csv_file_header( measurements, os.path.join(output_folder, model_name + '_' + feature + '_full_suff.csv')) else: if os.path.exists( os.path.join(output_folder, 'full_' + feature + '_suffix_measures.csv')): sup.create_csv_file( measurements, os.path.join(output_folder, 'full_' + feature + '_suffix_measures.csv'), mode='a') else: sup.create_csv_file_header( measurements, os.path.join(output_folder, 'full_' + feature + '_suffix_measures.csv'))
def predict_next(timeformat, parameters, is_single_exec=True): """Main function of the suffix prediction module. Args: timeformat (str): event-log date-time format. parameters (dict): parameters used in the training step. is_single_exec (boolean): generate measurments stand alone or share results with other runing experiments (optional) """ global START_TIMEFORMAT global INDEX_AC global INDEX_RL global DIM global TBTW global EXP START_TIMEFORMAT = timeformat output_route = os.path.join('output_files', parameters['folder']) model_name, _ = os.path.splitext(parameters['model_file']) # Loading of testing dataframe df_test = pd.read_csv( os.path.join(output_route, 'parameters', 'test_log.csv')) df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp']) df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp']) df_test = df_test.drop(columns=['user']) df_test = df_test.rename(index=str, columns={"role": "user"}) # Loading of parameters from training with open(os.path.join(output_route, 'parameters', 'model_parameters.json')) as file: data = json.load(file) EXP = {k: v for k, v in data['exp_desc'].items()} print(EXP) DIM['samples'] = int(data['dim']['samples']) DIM['time_dim'] = int(data['dim']['time_dim']) DIM['features'] = int(data['dim']['features']) TBTW['max_tbtw'] = float(data['max_tbtw']) INDEX_AC = {int(k): v for k, v in data['index_ac'].items()} INDEX_RL = {int(k): v for k, v in data['index_rl'].items()} file.close() if EXP['norm_method'] == 'max': max_tbtw = np.max(df_test.tbtw) norm = lambda x: x['tbtw'] / max_tbtw df_test['tbtw_norm'] = df_test.apply(norm, axis=1) elif EXP['norm_method'] == 'lognorm': logit = lambda x: math.log1p(x['tbtw']) df_test['tbtw_log'] = df_test.apply(logit, axis=1) max_tbtw = np.max(df_test.tbtw_log) norm = lambda x: x['tbtw_log'] / max_tbtw df_test['tbtw_norm'] = df_test.apply(norm, axis=1) ac_alias = create_alias(len(INDEX_AC)) rl_alias = create_alias(len(INDEX_RL)) # Next event selection method and numbers of repetitions variants = [{ 'imp': 'Random Choice', 'rep': 15 }, { 'imp': 'Arg Max', 'rep': 1 }] # Generation of predictions model = load_model(os.path.join(output_route, parameters['model_file'])) for var in variants: measurements = list() for i in range(0, var['rep']): prefixes = create_pref_suf(df_test, ac_alias, rl_alias) prefixes = predict(model, prefixes, ac_alias, rl_alias, var['imp']) accuracy = (np.sum([x['ac_true'] for x in prefixes]) / len(prefixes)) if is_single_exec: sup.create_csv_file_header( prefixes, os.path.join(output_route, model_name + '_rep_' + str(i) + '_next.csv')) # Save results measurements.append({ **dict(model=os.path.join(output_route, parameters['model_file']), implementation=var['imp']), **{ 'accuracy': accuracy }, **EXP }) if measurements: if is_single_exec: sup.create_csv_file_header( measurements, os.path.join(output_route, model_name + '_next.csv')) else: if os.path.exists( os.path.join('output_files', 'next_event_measures.csv')): sup.create_csv_file(measurements, os.path.join( 'output_files', 'next_event_measures.csv'), mode='a') else: sup.create_csv_file_header( measurements, os.path.join('output_files', 'next_event_measures.csv'))
def objective(settings): """Main aplication method""" # Read settings from config file settings = read_settings(settings) # Output folder creation if not os.path.exists(settings['output']): os.makedirs(settings['output']) os.makedirs(os.path.join(settings['output'], 'sim_data')) # Copy event-log to output folder copyfile(os.path.join(settings['input'], settings['file']), os.path.join(settings['output'], settings['file'])) # Event log reading log = lr.LogReader(os.path.join(settings['output'], settings['file']), settings['timeformat']) # Execution steps mining_structure(settings, settings['epsilon'], settings['eta']) bpmn = br.BpmnReader(os.path.join(settings['output'], settings['file'].split('.')[0]+'.bpmn')) process_graph = gph.create_process_structure(bpmn) # Evaluate alignment chk.evaluate_alignment(process_graph, log, settings) print("-- Mining Simulation Parameters --") parameters, process_stats = par.extract_parameters(log, bpmn, process_graph) xml.print_parameters(os.path.join(settings['output'], settings['file'].split('.')[0]+'.bpmn'), os.path.join(settings['output'], settings['file'].split('.')[0]+'.bpmn'), parameters) response = dict() measurements = list() status = STATUS_OK sim_values = list() process_stats = pd.DataFrame.from_records(process_stats) for rep in range(settings['repetitions']): print("Experiment #" + str(rep + 1)) try: simulate(settings, rep) process_stats = process_stats.append(measure_stats(settings, bpmn, rep), ignore_index=True, sort=False) sim_values.append(gen.mesurement(process_stats, settings, rep)) except: status = STATUS_FAIL break data = {'alg_manag': settings['alg_manag'], 'epsilon': settings['epsilon'], 'eta': settings['eta'], 'output': settings['output'] } if status == STATUS_OK: loss = (1 - np.mean([x['act_norm'] for x in sim_values])) if loss < 0: response = {'loss': loss, 'params': settings, 'status': STATUS_FAIL} measurements.append({**{'loss': loss, 'status': STATUS_FAIL}, **data}) else: response = {'loss': loss, 'params': settings, 'status': status} measurements.append({**{'loss': loss, 'status': status}, **data}) else: response = {'params': settings, 'status': status} measurements.append({**{'loss': 1, 'status': status}, **data}) if os.path.getsize(os.path.join('outputs', settings['temp_file'])) > 0: sup.create_csv_file(measurements, os.path.join('outputs', settings['temp_file']),mode='a') else: sup.create_csv_file_header(measurements, os.path.join('outputs', settings['temp_file'])) return response
def predict_suffix(output_folder, model_file, is_single_exec=True): """Main function of the suffix prediction module. Args: timeformat (str): event-log date-time format. parameters (dict): parameters used in the training step. is_single_exec (boolean): generate measurments stand alone or share results with other runing experiments (optional) """ global START_TIMEFORMAT global INDEX_AC global INDEX_RL global DIM global TBTW global EXP START_TIMEFORMAT = timeformat max_trace_size = 100 output_route = os.path.join('..', 'Camargo', 'output_files', output_folder) model_name, _ = os.path.splitext(model_file) # Loading of testing dataframe df_test = pd.read_csv( os.path.join(output_route, 'parameters', 'test_log.csv')) #df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp']) #df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp']) #df_test = df_test.drop(columns=['user']) #df_test = df_test.rename(index=str, columns={"role": "user"}) # Loading of parameters from training with open(os.path.join(output_route, 'parameters', 'model_parameters.json')) as file: data = json.load(file) EXP = {k: v for k, v in data['exp_desc'].items()} print(EXP) DIM['samples'] = int(data['dim']['samples']) DIM['time_dim'] = int(data['dim']['time_dim']) DIM['features'] = int(data['dim']['features']) INDEX_AC = {int(k): v for k, v in data['index_ac'].items()} INDEX_RL = {int(k): v for k, v in data['index_rl'].items()} file.close() ac_alias = create_alias(len(INDEX_AC)) rl_alias = create_alias(len(INDEX_RL)) # Next event selection method and numbers of repetitions variants = [{ 'imp': 'Random Choice', 'rep': 2 }, { 'imp': 'Arg Max', 'rep': 1 }] # Generation of predictions model = load_model(os.path.join(output_route, model_file)) for var in variants: args = dict(df_test=df_test, ac_alias=ac_alias, rl_alias=rl_alias, output_route=output_route, model_file=model_file, imp=var['imp'], max_trace_size=max_trace_size) measurements = list() for i in range(0, var['rep']): results = execute_experiments([2, 5, 8, 10, 15, 20], model, args) # Save results measurements.append({ **dict(model=os.path.join(output_route, model_file), implementation=var['imp']), **results, **EXP }) if measurements: if is_single_exec: sup.create_csv_file_header( measurements, os.path.join(output_route, model_name + '_sufix.csv')) else: if os.path.exists( os.path.join(output_route, 'sufix_measures.csv')): sup.create_csv_file(measurements, os.path.join(output_route, 'sufix_measures.csv'), mode='a') else: sup.create_csv_file_header( measurements, os.path.join(output_route, 'sufix_measures.csv'))
def predict(timeformat, parameters, is_single_exec=True): """Main function of the event log generation module. Args: timeformat (str): event-log date-time format. parameters (dict): parameters used in the training step. is_single_exec (boolean): generate measurments stand alone or share results with other runing experiments (optional) """ global START_TIMEFORMAT global INDEX_AC global INDEX_RL global DIM global TBTW global EXP START_TIMEFORMAT = timeformat output_route = os.path.join('output_files', parameters['folder']) model_name, _ = os.path.splitext(parameters['model_file']) # Loading of testing dataframe df_test = pd.read_csv( os.path.join(output_route, 'parameters', 'test_log.csv')) df_test['start_timestamp'] = pd.to_datetime(df_test['start_timestamp']) df_test['end_timestamp'] = pd.to_datetime(df_test['end_timestamp']) df_test = df_test.drop(columns=['user']) df_test = df_test.rename(index=str, columns={"role": "user"}) # Loading of parameters from training with open(os.path.join(output_route, 'parameters', 'model_parameters.json')) as file: data = json.load(file) EXP = {k: v for k, v in data['exp_desc'].items()} print(EXP) DIM['samples'] = int(data['dim']['samples']) DIM['time_dim'] = int(data['dim']['time_dim']) DIM['features'] = int(data['dim']['features']) TBTW['max_tbtw'] = float(data['max_tbtw']) INDEX_AC = {int(k): v for k, v in data['index_ac'].items()} INDEX_RL = {int(k): v for k, v in data['index_rl'].items()} file.close() # Next event selection method and numbers of repetitions variants = [{ 'imp': 'Random Choice', 'rep': 1 }, { 'imp': 'Arg Max', 'rep': 0 }] # Generation of predictions model = load_model(os.path.join(output_route, parameters['model_file'])) df_test_log = df_test.to_dict('records') for var in variants: for _ in range(0, var['rep']): generated_event_log = generate_traces(model, var['imp'], len(df_test.caseid.unique()), 200) sim_task = gen.gen_mesurement(df_test_log, generated_event_log, 'task') sim_role = gen.gen_mesurement(df_test_log, generated_event_log, 'user') if is_single_exec: sup.create_csv_file_header( sim_task, os.path.join(output_route, model_name + '_similarity.csv')) sup.create_csv_file_header( generated_event_log, os.path.join(output_route, model_name + '_log.csv')) # Save results measurements = list() measurements.append({ **dict(model=os.path.join(output_route, parameters['model_file']), implementation=var['imp'], dl_task=np.mean([x['sim_score'] for x in sim_task]), dl_user=np.mean([x['sim_score'] for x in sim_role]), mae=np.mean([x['abs_err'] for x in sim_task]), dlt=np.mean([x['sim_score_t'] for x in sim_task])), **EXP }) if is_single_exec: sup.create_csv_file_header( measurements, os.path.join('output_files', model_name + '_measures.csv')) else: if os.path.exists( os.path.join('output_files', 'total_measures.csv')): sup.create_csv_file(measurements, os.path.join('output_files', 'total_measures.csv'), mode='a') else: sup.create_csv_file_header( measurements, os.path.join('output_files', 'total_measures.csv'))