def read_stats(settings, bpmn, rep): """Reads the simulation results stats Args: settings (dict): Path to jar and file names rep (int): repetition number """ m_settings = dict() m_settings['output'] = settings['output'] m_settings['file'] = settings['file'] column_names = {'resource': 'user'} m_settings['read_options'] = settings['read_options'] m_settings['read_options']['timeformat'] = '%Y-%m-%d %H:%M:%S.%f' m_settings['read_options']['column_names'] = column_names temp = lr.LogReader( os.path.join( m_settings['output'], 'sim_data', m_settings['file'].split('.')[0] + '_' + str(rep + 1) + '.csv'), m_settings['read_options']) process_graph = gph.create_process_structure(bpmn) results_replayer = rpl.LogReplayer(process_graph, temp, settings, source='simulation', run_num=rep + 1) temp_stats = results_replayer.process_stats temp_stats['role'] = temp_stats['resource'] return temp_stats
def read_log(parms): parms['read_options']['filter_d_attrib'] = True log = lr.LogReader(os.path.join('input_files', parms['file_name']), parms['read_options']) log_df = pd.DataFrame(log.data) log_df['end_time'] = log_df['end_timestamp'].astype(np.int64) // 10**9 if parms['one_timestamp']: log_df = log_df.to_dict('records') log_df = sorted(log_df, key=lambda x: x['caseid']) for key, group in itertools.groupby(log_df, key=lambda x: x['caseid']): events = list(group) events = sorted(events, key=itemgetter('end_timestamp')) for i in range(0, len(events)): # In one-timestamp approach the first activity of the trace is taken as instant # since there is no previous timestamp to find a range if i == 0: events[i]['start_time'] = events[i]['end_time'] - 1 else: events[i]['start_time'] = events[i - 1]['end_time'] log_df = pd.DataFrame(log_df) else: print(log_df) log_df['start_time'] = log_df['start_timestamp'].astype( np.int64) // 10**9 log_df = log_df[log_df.task != 'Start'] log_df = log_df[log_df.task != 'End'] log_df = log_df.reset_index(drop=True) log_df.sort_values(by=['caseid', 'start_time'], ascending=[True, True], inplace=True) log_df.reset_index(inplace=True, drop=True) log_df['event_id'] = log_df.index return log_df
def training_model(parameters, timeformat, no_loops=False): """Main method of the embedding training module. Args: parameters (dict): parameters for training the embeddeding network. timeformat (str): event-log date-time format. no_loops (boolean): remove loops fom the event-log (optional). """ log = lr.LogReader(os.path.join('input_files', parameters['file_name']), timeformat, timeformat, one_timestamp=True) # Pre-processing tasks _, resource_table = rl.read_resource_pool(log, sim_percentage=0.50) # Role discovery log_df_resources = pd.DataFrame.from_records(resource_table) log_df_resources = log_df_resources.rename(index=str, columns={"resource": "user"}) # Dataframe creation log_df = pd.DataFrame.from_records(log.data) log_df = log_df.merge(log_df_resources, on='user', how='left') log_df = log_df[log_df.task != 'Start'] log_df = log_df[log_df.task != 'End'] log_df = log_df.reset_index(drop=True) if no_loops: log_df = nsup.reduce_loops(log_df) # Index creation ac_index = create_index(log_df, 'task') ac_index['start'] = 0 ac_index['end'] = len(ac_index) index_ac = {v: k for k, v in ac_index.items()} rl_index = create_index(log_df, 'role') rl_index['start'] = 0 rl_index['end'] = len(rl_index) index_rl = {v: k for k, v in rl_index.items()} # Define the number of dimensions as the 4th root of the number of categories dim_number = math.ceil( len( list( itertools.product( *[list(ac_index.items()), list(rl_index.items())])))**0.25) ac_weights, rl_weights = train_embedded(log_df, ac_index, rl_index, dim_number) sup.create_file_from_list( reformat_matrix(index_ac, ac_weights), os.path.join(os.path.join('input_files', 'embedded_matix'), 'ac_' + parameters['file_name'].split('.')[0] + '.emb')) sup.create_file_from_list( reformat_matrix(index_rl, rl_weights), os.path.join(os.path.join('input_files', 'embedded_matix'), 'rl_' + parameters['file_name'].split('.')[0] + '.emb'))
def load_log(params): params['read_options']['filter_d_attrib'] = False log = lr.LogReader(os.path.join('input_files', params['file_name']), params['read_options']) log_df = pd.DataFrame(log.data) if set(['Unnamed: 0', 'role']).issubset(set(log_df.columns)): log_df.drop(columns=['Unnamed: 0', 'role'], inplace=True) log_df = log_df[~log_df.task.isin(['Start', 'End'])] return log_df
def measure_stats(settings, bpmn, rep): """Executes BIMP Simulations. Args: settings (dict): Path to jar and file names rep (int): repetition number """ timeformat = '%Y-%m-%d %H:%M:%S.%f' temp = lr.LogReader(os.path.join(settings['output'], 'sim_data', settings['file'].split('.')[0] + '_'+str(rep + 1)+'.csv'), timeformat) process_graph = gph.create_process_structure(bpmn) _, _, temp_stats = rpl.replay(process_graph, temp, source='simulation', run_num=rep + 1) temp_stats = pd.DataFrame.from_records(temp_stats) role = lambda x: x['resource'] temp_stats['role'] = temp_stats.apply(role, axis=1) return temp_stats
def read_inputs(timeformat, log_columns_numbers, log_file_name, bpmn_file_name, ns_include=True): # Reading and parsing of config file log, bpmn = None, None try: log = lr.LogReader(log_file_name, timeformat, timeformat, log_columns_numbers, ns_include) bpmn = br.BpmnReader(bpmn_file_name) except IOError as e: print('Input error ' + str(e)) except Exception as e: print('Unexpected error...' + '\n' + str(e)) return log, bpmn
def read_inputs(self) -> None: # Output folder creation if not os.path.exists(self.settings['output']): os.makedirs(self.settings['output']) os.makedirs(os.path.join(self.settings['output'], 'sim_data')) # Event log reading self.log = lr.LogReader( os.path.join(self.settings['input'], self.settings['file']), self.settings['read_options']) # Create customized event-log for the external tools xes.XesWriter(self.log, self.settings) # Execution steps self.mining_structure(self.settings) self.bpmn = br.BpmnReader( os.path.join(self.settings['output'], self.settings['file'].split('.')[0] + '.bpmn')) self.process_graph = gph.create_process_structure(self.bpmn)
def read_inputs(self, **kwargs) -> None: # Output folder creation if not os.path.exists(self.settings['output']): os.makedirs(self.settings['output']) os.makedirs(os.path.join(self.settings['output'], 'sim_data')) # Event log reading self.log = lr.LogReader( os.path.join(self.settings['input'], self.settings['file']), self.settings['read_options']) # Time splitting 80-20 self.split_timeline(0.2, self.settings['read_options']['one_timestamp']) # Create customized event-log for the external tools xes.XesWriter(self.log, self.settings) # Execution steps self.mining_structure(self.settings) self.bpmn = br.BpmnReader( os.path.join(self.settings['output'], self.settings['file'].split('.')[0] + '.bpmn')) self.process_graph = gph.create_process_structure(self.bpmn) # Replaying test partition print("-- Reading test partition --") try: test_replayer = rpl.LogReplayer( self.process_graph, self.get_traces( self.log_test, self.settings['read_options']['one_timestamp']), self.settings) self.process_stats = test_replayer.process_stats self.process_stats = pd.DataFrame.from_records(self.process_stats) self.log_test = test_replayer.conformant_traces except AssertionError as e: print(e) self.status = STATUS_FAIL print("-- End of trial --")
def single_exec(settings): """Main aplication method""" # Read settings from config file settings = read_settings(settings) # Output folder creation if not os.path.exists(settings['output']): os.makedirs(settings['output']) os.makedirs(os.path.join(settings['output'], 'sim_data')) # Copy event-log to output folder copyfile(os.path.join(settings['input'], settings['file']), os.path.join(settings['output'], settings['file'])) # Event log reading log = lr.LogReader(os.path.join(settings['output'], settings['file']), settings['timeformat']) # Execution steps mining_structure(settings, settings['epsilon'], settings['eta']) bpmn = br.BpmnReader(os.path.join(settings['output'], settings['file'].split('.')[0]+'.bpmn')) process_graph = gph.create_process_structure(bpmn) # Evaluate alignment chk.evaluate_alignment(process_graph, log, settings) print("-- Mining Simulation Parameters --") parameters, process_stats = par.extract_parameters(log, bpmn, process_graph) xml.print_parameters(os.path.join(settings['output'], settings['file'].split('.')[0]+'.bpmn'), os.path.join(settings['output'], settings['file'].split('.')[0]+'.bpmn'), parameters) response = list() status = 'ok' sim_values = list() if settings['simulation']: # if settings['analysis']: process_stats = pd.DataFrame.from_records(process_stats) for rep in range(settings['repetitions']): print("Experiment #" + str(rep + 1)) try: simulate(settings, rep) process_stats = process_stats.append(measure_stats(settings, bpmn, rep), ignore_index=True, sort=False) sim_values.append(gen.mesurement(process_stats, settings, rep)) except: status = 'fail' break data = {'alg_manag': settings['alg_manag'], 'epsilon': settings['epsilon'], 'eta': settings['eta'], 'output': settings['output'] } if status == 'ok': loss = (1 - np.mean([x['act_norm'] for x in sim_values])) if loss < 0: response.append({**{'loss': loss, 'status': 'fail'}, **data}) else: response.append({**{'loss': loss, 'status': status}, **data}) else: response.append({**{'loss': 1, 'status': status}, **data}) return response
def training_model(timeformat, args, no_loops=False): """Main method of the training module. Args: timeformat (str): event-log date-time format. args (dict): parameters for training the network. no_loops (boolean): remove loops fom the event-log (optional). """ parameters = dict() log = lr.LogReader(os.path.join('input_files', args['file_name']), timeformat, timeformat, one_timestamp=True) _, resource_table = rl.read_resource_pool(log, sim_percentage=0.50) # Role discovery log_df_resources = pd.DataFrame.from_records(resource_table) log_df_resources = log_df_resources.rename(index=str, columns={"resource": "user"}) # Dataframe creation log_df = pd.DataFrame.from_records(log.data) log_df = log_df.merge(log_df_resources, on='user', how='left') log_df = log_df[log_df.task != 'Start'] log_df = log_df[log_df.task != 'End'] log_df = log_df.reset_index(drop=True) if no_loops: log_df = nsup.reduce_loops(log_df) # Index creation ac_index = create_index(log_df, 'task') ac_index['start'] = 0 ac_index['end'] = len(ac_index) index_ac = {v: k for k, v in ac_index.items()} rl_index = create_index(log_df, 'role') rl_index['start'] = 0 rl_index['end'] = len(rl_index) index_rl = {v: k for k, v in rl_index.items()} # Load embedded matrix ac_weights = load_embedded( index_ac, 'ac_' + args['file_name'].split('.')[0] + '.emb') rl_weights = load_embedded( index_rl, 'rl_' + args['file_name'].split('.')[0] + '.emb') # Calculate relative times log_df = add_calculated_features(log_df, ac_index, rl_index) # Split validation datasets log_df_train, log_df_test = nsup.split_train_test(log_df, 0.3) # 70%/30% # Input vectorization vec = vectorization(log_df_train, ac_index, rl_index, args) # Parameters export output_folder = os.path.join('output_files', sup.folder_id()) if not os.path.exists(output_folder): os.makedirs(output_folder) os.makedirs(os.path.join(output_folder, 'parameters')) parameters['event_log'] = args['file_name'] parameters['exp_desc'] = args parameters['index_ac'] = index_ac parameters['index_rl'] = index_rl parameters['dim'] = dict(samples=str(vec['prefixes']['x_ac_inp'].shape[0]), time_dim=str( vec['prefixes']['x_ac_inp'].shape[1]), features=str(len(ac_index))) parameters['max_tbtw'] = vec['max_tbtw'] sup.create_json( parameters, os.path.join(output_folder, 'parameters', 'model_parameters.json')) sup.create_csv_file_header( log_df_test.to_dict('records'), os.path.join(output_folder, 'parameters', 'test_log.csv')) if args['model_type'] == 'joint': mj.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'shared': msh.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'specialized': msp.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'concatenated': mcat.training_model(vec, ac_weights, rl_weights, output_folder, args) elif args['model_type'] == 'shared_cat': mshcat.training_model(vec, ac_weights, rl_weights, output_folder, args)