def on_start_opt_clicked(_): global ON_EXECUTION # "linking function with output" res_out.value = '' graph_out.clear_output() # what happens when we press the button temp_file = sup.folder_id() if not os.path.exists(os.path.join('outputs', temp_file)): open(os.path.join('outputs', temp_file), 'w').close() settings = { 'file': txt_eventlog.value, 'repetitions': sl_rep_opt.value, 'simulation': True, 'temp_file': temp_file } args = { 'epsilon': sl_epsilon_range.value, 'eta': sl_eta_range.value, 'max_eval': sl_max_evals.value } ON_EXECUTION = True thread = threading.Thread(target=work, args=(temp_file, )) thread.start() # Deactivate controls change_enablement(box_opt, True) results, bayes_trials = sim.hyper_execution(settings, args) ON_EXECUTION = False # Reactivate controls change_enablement(box_opt, False)
def define_general_settings(settings): """ Sets the app general settings""" column_names = { 'Case ID': 'caseid', 'Activity': 'task', 'lifecycle:transition': 'event_type', 'Resource': 'user' } # Event-log reading options settings['read_options'] = { 'timeformat': '%Y-%m-%dT%H:%M:%S.%f', 'column_names': column_names, 'one_timestamp': False, 'filter_d_attrib': True, 'ns_include': True } # Folders structure settings['input'] = 'inputs' settings['output'] = os.path.join('outputs', sup.folder_id()) # External tools routes settings['miner_path'] = os.path.join('external_tools', 'splitminer', 'splitminer.jar') settings['bimp_path'] = os.path.join('external_tools', 'bimp', 'qbp-simulator-engine.jar') settings['align_path'] = os.path.join('external_tools', 'proconformance', 'ProConformance2.jar') settings['aligninfo'] = os.path.join(settings['output'], 'CaseTypeAlignmentResults.csv') settings['aligntype'] = os.path.join(settings['output'], 'AlignmentStatistics.csv') return settings
def temp_path_redef(self) -> None: # Paths redefinition self.settings['output'] = os.path.join('outputs', sup.folder_id()) if self.settings['alg_manag'] == 'repair': try: self.settings['aligninfo'] = os.path.join( self.settings['output'], 'CaseTypeAlignmentResults.csv') self.settings['aligntype'] = os.path.join( self.settings['output'], 'AlignmentStatistics.csv') except Exception as e: print(e) self.status = STATUS_FAIL
def sbatch_creator(configs): for i, _ in enumerate(configs): if configs[i]['model_type'] in ['shared_cat', 'seq2seq']: exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] + arch) elif configs[i]['model_type'] in [ 'shared_cat_inter', 'seq2seq_inter', 'shared_cat_inter_full' ]: exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] + arch + 'i') elif configs[i]['model_type'] in ['shared_cat_snap']: exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] + arch + 's') elif configs[i]['model_type'] in ['shared_cat_city']: exp_name = (os.path.splitext(log)[0].lower().split(' ')[0][:4] + arch + 'c') if imp == 2: default = [ '#!/bin/bash', '#SBATCH --partition=gpu', '#SBATCH --gres=gpu:tesla:1', '#SBATCH -J ' + exp_name, '#SBATCH -N 1', '#SBATCH --mem=14000', '#SBATCH -t 72:00:00', 'module load cuda/10.0', 'module load python/3.6.3/virtenv', 'source activate lstm_pip' ] else: default = [ '#!/bin/bash', '#SBATCH --partition=amd', '#SBATCH -J ' + exp_name, '#SBATCH -N 1', '#SBATCH --mem=14000', '#SBATCH -t 72:00:00', 'module load cuda/10.0', 'module load python/3.6.3/virtenv', 'source activate lstm_pip' ] def format_option(short, parm): return (' -' + short + ' None' if configs[i][parm] in [None, 'nan', '', np.nan] else ' -' + short + ' ' + str(configs[i][parm])) options = 'python lstm.py -f ' + log + ' -i ' + str(imp) options += ' -a training' options += ' -o True' options += format_option('l', 'lstm_act') options += format_option('y', 'l_sizes') options += format_option('d', 'dense_act') options += format_option('n', 'norm_method') options += format_option('m', 'model_type') options += format_option('p', 'optimizers') if arch == 'sh': options += format_option('z', 'n_sizes') default.append(options) file_name = sup.folder_id() sup.create_text_file(default, os.path.join(output_folder, file_name))
def main(argv): settings = dict() args = dict() # Exec mode 'single', 'optimizer' settings['exec_mode'] = 'single' # Parameters setting manual fixed or catched by console for batch operations if not argv: # Event-log filename settings['file'] = 'Production.xes.gz' settings['repetitions'] = 1 settings['simulation'] = True if settings['exec_mode'] == 'single': # Splittminer settings [0..1] settings['epsilon'] = 0.7 settings['eta'] = 0.7 # 'removal', 'replacement', 'repairment' settings['alg_manag'] = 'removal' # Single Execution sim.single_exec(settings) else: args['epsilon'] = [0.3, 0.7] args['eta'] = [0.3, 0.7] args['max_eval'] = 2 settings['temp_file'] = sup.folder_id() # Execute optimizer if not os.path.exists(os.path.join('outputs', settings['temp_file'])): open(os.path.join('outputs', settings['temp_file']), 'w').close() sim.hyper_execution(settings, args) else: # Catch parameters by console try: opts, _ = getopt.getopt(argv, "hf:e:n:m:r:", ['eventlog=', "epsilon=", "eta=", "alg_manag=", "repetitions="]) for opt, arg in opts: key = catch_parameter(opt) if key in ['epsilon','eta']: settings[key] = float(arg) elif key == 'repetitions': settings[key] = int(arg) else: settings[key] = arg except getopt.GetoptError: print('Invalid option') sys.exit(2) settings['simulation'] = True sim.single_exec(settings)
def read_settings(settings): """Catch parameters fron console or code defined""" config = cp.ConfigParser(interpolation=None) config.read("./config.ini") # Basic settings settings['input'] = config.get('FOLDERS', 'inputs') settings['output'] = os.path.join(config.get('FOLDERS', 'outputs'), sup.folder_id()) settings['timeformat'] = config.get('EXECUTION', 'timeformat') # Conditional settings settings['miner_path'] = reformat_path(config.get('EXTERNAL', 'splitminer')) if settings['alg_manag'] == 'repairment': settings['align_path'] = reformat_path(config.get('EXTERNAL', 'proconformance')) settings['aligninfo'] = os.path.join(settings['output'], config.get('ALIGNMENT', 'aligninfo')) settings['aligntype'] = os.path.join(settings['output'], config.get('ALIGNMENT', 'aligntype')) if settings['simulation']: settings['bimp_path'] = reformat_path(config.get('EXTERNAL', 'bimp')) return settings
def sbatch_creator(file_list, activity): exp_name = activity[:4] for file in file_list: if imp == 2: default = [ '#!/bin/bash', '#SBATCH --partition=gpu', '#SBATCH --gres=gpu:tesla:1', '#SBATCH -J ' + exp_name, '#SBATCH -N 1', '#SBATCH --mem=7000', '#SBATCH -t 24:00:00', 'module load cuda/10.0', 'module load python/3.6.3/virtenv', 'source activate lstm_pip' ] else: default = [ '#!/bin/bash', '#SBATCH --partition=main', '#SBATCH -J ' + exp_name, '#SBATCH -N 1', '#SBATCH --mem=7000', '#SBATCH -t 24:00:00', 'module load cuda/10.0', 'module load python/3.6.3/virtenv', 'source activate lstm_pip' ] default.append('python lstm.py' + ' -a ' + activity + ' -c ' + file['folder'] + ' -b "' + file['file'] + '"' + ' -o True' + ' -x False' + ' -t 100') file_name = sup.folder_id() sup.create_text_file(default, os.path.join(output_folder, file_name))
def __init__(self, params): """constructor""" self.log = self.load_log(params) self.output_folder = os.path.join('output_files', sup.folder_id()) # Split validation partitions self.log_train = pd.DataFrame() self.log_test = pd.DataFrame() # Activities and roles indexes self.ac_index = dict() self.index_ac = dict() self.rl_index = dict() self.index_rl = dict() # Training examples self.examples = dict() # Embedded dimensions self.ac_weights = list() self.rl_weights = list() # Preprocess the event-log self.preprocess(params) # Train model m_loader = mload.ModelLoader(params) m_loader.train(params['model_type'], self.examples, self.ac_weights, self.rl_weights, self.output_folder)
def extract_features(parms): # ADAPTATION: The output file and the auxiliar files routes were customised fname = os.path.splitext(parms['file_name'])[0] output_path = os.path.join('outputs', fname + '_complete') aux_files_path = os.path.join('outputs', sup.folder_id()) process_temp_folder(aux_files_path) # ADAPTATION: The inclusion of any format of .csv event log and use single timestamps was added df = read_log(parms) #Creating L* #prf[0] - all prefixes #prf[1] - ids of prefixes, so that we know who is who #prf[2] - meta data of prefix. Here it is start/end time. #pref[3] = timestamps #pref[4] - complete prefixes prf = create_all_prefixes(df) pk.dump(prf[0], open(os.path.join(aux_files_path, fname + 'prefixes.p'), 'wb')) pk.dump(prf[1], open(os.path.join(aux_files_path, fname + 'ids.p'), 'wb')) pk.dump(prf[2], open(os.path.join(aux_files_path, fname + 'intervals.p'), 'wb')) pk.dump(prf[3], open(os.path.join(aux_files_path, fname + 'ts.p'), 'wb')) pk.dump(prf[4], open(os.path.join(aux_files_path, fname + 'complete.p'), 'wb')) #pk.dump(prf[5], open('outcome.p', 'wb')) prefixes = list(unpk(os.path.join(aux_files_path, fname + 'prefixes.p'))) ids = list(unpk(os.path.join(aux_files_path, fname + 'ids.p'))) intervals = list(unpk(os.path.join(aux_files_path, fname + 'intervals.p'))) # ts = list(unpk(os.path.join(aux_files_path, 'ts.p'))) complete = list(unpk(os.path.join(aux_files_path, fname + 'complete.p'))) int_start = [] int_end = [] int_event_id = [] for i in intervals: int_start.append(i[0]) int_end.append(i[1]) int_event_id.append(i[2]) df_prefixes = pd.DataFrame({ 'id': ids, 'event_id': int_event_id, 'start_time': int_start, 'end_time': int_end, 'complete': complete }) #, 'outcome':outcomes}) df_prefixes['prefix'] = '' for i, p in enumerate(prefixes): df_prefixes.at[i, 'prefix'] = p #','.join(p) dataset = feature_encoding_new(df_prefixes) # ADAPTATION: The outcome Y was removed since this is not used in our approach, # additionally the features of all prefixes were calculated intercase_df = pd.DataFrame(dataset, columns=[ 'id', 'event_id', 'elapsed', 'lasttask', 'l1', 'l2', 'l3', 'city1', 'city2', 'city3', 'city4', 'city5', 'snap1', 'snap2', 'snap3', 'snap4', 'snap5' ]) # ADAPTATION: The merge with the original event log was included df = df.merge(intercase_df, on='event_id', how='left') df = df.drop( ['end_time', 'start_time', 'event_id', 'id', 'elapsed', 'lasttask'], axis=1) df.to_csv(output_path + '.csv', header=True) process_temp_folder(aux_files_path) os.rmdir(aux_files_path)
def train(path_trainings_log, event_log_name, id_key_for_log): # Laden der Trainingsmenge log_train = pd.read_csv(path_trainings_log, encoding='unicode_escape') ac_index = dict(activities_df.values.tolist()) rl_index = dict(roles_df.values.tolist()) index_ac = {v: k for k, v in ac_index.items()} index_rl = {v: k for k, v in rl_index.items()} equi = {'ac_index': 'activities', 'rl_index': 'roles', 'dur_norm': 'times'} columns = list(equi.keys()) vec = {'prefixes': dict(), 'next_evt': dict(), 'max_dur': np.max(log_train.dur)} temp_data = list() log_df = log_train.to_dict('records') key = 'end_timestamp' log_df = sorted(log_df, key=lambda x: (x['caseid'], key)) for key, group in itertools.groupby(log_df, key=lambda x: x['caseid']): trace = list(group) temp_dict = dict() for x in columns: serie = [y[x] for y in trace] if x == 'ac_index': serie.insert(0, ac_index[('Start')]) serie.append(ac_index[('End')]) elif x == 'rl_index': serie.insert(0, rl_index[('Start')]) serie.append(rl_index[('End')]) else: serie.insert(0, 0) serie.append(0) temp_dict = {**{x: serie}, **temp_dict} temp_dict = {**{'caseid': key}, **temp_dict} temp_data.append(temp_dict) # n-gram definition for i, _ in enumerate(temp_data): for x in columns: serie = list(ngrams(temp_data[i][x], parameters['n_size'], pad_left=True, left_pad_symbol=0)) print("serie", i, x, serie) y_serie = [x[-1] for x in serie] serie = serie[:-1] # print("serie", i, x, serie) y_serie = y_serie[1:] # print("y_serie", i, x, y_serie) vec['prefixes'][equi[x]] = vec['prefixes'][equi[x]] + serie if i > 0 else serie vec['next_evt'][equi[x]] = vec['next_evt'][equi[x]] + y_serie if i > 0 else y_serie # Transform task, dur and role prefixes in vectors for value in equi.values(): vec['prefixes'][value] = np.array(vec['prefixes'][value]) vec['next_evt'][value] = np.array(vec['next_evt'][value]) # Reshape dur (prefixes, n-gram size, 1) i.e. time distribute vec['prefixes']['times'] = vec['prefixes']['times'].reshape( (vec['prefixes']['times'].shape[0], vec['prefixes']['times'].shape[1], 1)) # one-hot encode target values vec['next_evt']['activities'] = ku.to_categorical( vec['next_evt']['activities'], num_classes=len(ac_index)) vec['next_evt']['roles'] = ku.to_categorical( vec['next_evt']['roles'], num_classes=len(rl_index)) # Load embedded matrix ac_weights = load_embedded(index_ac, 'ac_' + event_log_name + '.emb') rl_weights = load_embedded(index_rl, 'rl_' + event_log_name + '.emb') folder_id = sup.folder_id()+id_key_for_log output_folder = os.path.join('output_files', folder_id) # Export params if not os.path.exists(output_folder): os.makedirs(output_folder) os.makedirs(os.path.join(output_folder, 'parameters')) f = open(os.path.join(output_folder, 'description'), "w") f.write(path_trainings_log) f.write(event_log_name) f.write(id_key_for_log) f.close() parameters['index_ac'] = index_ac parameters['index_rl'] = index_rl if parameters['model_type'] in ['shared_cat', 'shared_cat_inter']: print("IF") parameters['dim'] = dict( samples=str(vec['prefixes']['activities'].shape[0]), time_dim=str(vec['prefixes']['activities'].shape[1]), features=str(len(ac_index))) else: parameters['dim'] = dict( samples=str(vec['encoder_input_data']['activities'].shape[0]), time_dim=str(vec['encoder_input_data']['activities'].shape[1]), features=str(len(ac_index))) parameters['max_dur'] = str(vec['max_dur']) sup.create_json(parameters, os.path.join(output_folder, 'parameters', 'model_parameters.json')) # Trainieren des Models m_loader = mload.ModelLoader(parameters) m_loader.train(parameters['model_type'], vec, ac_weights, rl_weights, output_folder)
def training_model(timeformat, args, no_loops=False): """Main method of the training module. Args: timeformat (str): event-log date-time format. args (dict): parameters for training the network. no_loops (boolean): remove loops fom the event-log (optional). """ parameters = dict() log = lr.LogReader(os.path.join('input_files', args['file_name']), timeformat, timeformat, one_timestamp=True) _, resource_table = rl.read_resource_pool(log, sim_percentage=0.50) # Role discovery log_df_resources = pd.DataFrame.from_records(resource_table) log_df_resources = log_df_resources.rename(index=str, columns={"resource": "user"}) # Dataframe creation log_df = pd.DataFrame.from_records(log.data) log_df = log_df.merge(log_df_resources, on='user', how='left') log_df = log_df[log_df.task != 'Start'] log_df = log_df[log_df.task != 'End'] log_df = log_df.reset_index(drop=True) if no_loops: log_df = nsup.reduce_loops(log_df) # Index creation ac_index = create_index(log_df, 'task') ac_index['start'] = 0 ac_index['end'] = len(ac_index) index_ac = {v: k for k, v in ac_index.items()} rl_index = create_index(log_df, 'role') rl_index['start'] = 0 rl_index['end'] = len(rl_index) index_rl = {v: k for k, v in rl_index.items()} # Load embedded matrix ac_weights = load_embedded( index_ac, 'ac_' + args['file_name'].split('.')[0] + '.emb') rl_weights = load_embedded( index_rl, 'rl_' + args['file_name'].split('.')[0] + '.emb') # Calculate relative times log_df = add_calculated_features(log_df, ac_index, rl_index) # Split validation datasets log_df_train, log_df_test = nsup.split_train_test(log_df, 0.3) # 70%/30% # Input vectorization vec = vectorization(log_df_train, ac_index, rl_index, args) # Parameters export output_folder = os.path.join('output_files', sup.folder_id()) if not os.path.exists(output_folder): os.makedirs(output_folder) os.makedirs(os.path.join(output_folder, 'parameters')) parameters['event_log'] = args['file_name'] parameters['exp_desc'] = args parameters['index_ac'] = index_ac parameters['index_rl'] = index_rl parameters['dim'] = dict(samples=str(vec['prefixes']['x_ac_inp'].shape[0]), time_dim=str( vec['prefixes']['x_ac_inp'].shape[1]), features=str(len(ac_index))) parameters['max_tbtw'] = vec['max_tbtw'] sup.create_json( parameters, os.path.join(output_folder, 'parameters', 'model_parameters.json')) sup.create_csv_file_header( log_df_test.to_dict('records'), os.path.join(output_folder, 'parameters', 'test_log.csv')) if args['model_type'] == 'joint': mj.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'shared': msh.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'specialized': msp.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'concatenated': mcat.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'shared_cat': mshcat.training_model(vec, ac_weights, rl_weights, output_folder, args)