def create_process_structure(bpmn, drawing=False): # Loading of bpmn structure into a directed graph g = load_process_structure(bpmn) if drawing: graph_network_x(g) sup.print_done_task() return g
def get_csv_events_data(self): """ reads and parse all the events information from a csv file """ sup.print_performed_task('Reading log traces ') log = pd.read_csv(self.input, dtype={'user': str}) if self.one_timestamp: self.column_names['Complete Timestamp'] = 'end_timestamp' log = log.rename(columns=self.column_names) log = log.astype({'caseid': object}) log = (log[(log.task != 'Start') & (log.task != 'End')] .reset_index(drop=True)) if self.filter_d_attrib: log = log[['caseid', 'task', 'user', 'end_timestamp']] log['end_timestamp'] = pd.to_datetime(log['end_timestamp'], format=self.timeformat) else: self.column_names['Start Timestamp'] = 'start_timestamp' self.column_names['Complete Timestamp'] = 'end_timestamp' log = log.rename(columns=self.column_names) log = log.astype({'caseid': object}) log = (log[(log.task != 'Start') & (log.task != 'End')] .reset_index(drop=True)) if self.filter_d_attrib: log = log[['caseid', 'task', 'user', 'start_timestamp', 'end_timestamp']] log['start_timestamp'] = pd.to_datetime(log['start_timestamp'], format=self.timeformat) log['end_timestamp'] = pd.to_datetime(log['end_timestamp'], format=self.timeformat) log['user'].fillna('SYS', inplace=True) self.data = log.to_dict('records') self.append_csv_start_end() self.split_event_transitions() sup.print_done_task()
def align_traces(self): """ This method is the kernel of the alignment process """ aligned_traces = list() i = 0 size = len(self.traces) for trace in self.traces: # Remove Start and End events trace = [x for x in trace if x['task'] not in ['Start', 'End']] try: # Alignment of each trace aligned_trace = self.process_trace(trace) if self.one_timestamp: aligned_trace = sorted(aligned_trace, key=itemgetter('end_timestamp')) aligned_trace = self.append_start_end(aligned_trace) aligned_traces.extend(aligned_trace) else: # completeness check and reformating aligned_trace = self.trace_verification(aligned_trace) if aligned_trace: aligned_trace = self.append_start_end(aligned_trace) aligned_traces.extend(aligned_trace) except Exception as e: next sup.print_progress(((i / (size - 1)) * 100), 'Aligning log traces with model ') i += 1 sup.print_done_task() return aligned_traces
def predict(model, prefixes, imp, max_trace_size): """Generate business process suffixes using a keras trained model. Args: model (keras model): keras trained model. prefixes (list): list of prefixes. imp (str): method of next event selection. """ # Generation of predictions for prefix in prefixes: # Activities and roles input shape(1,5) x_ac_ngram = np.append( np.zeros(DIM['time_dim']), np.array(prefix['ac_pref']), axis=0)[-DIM['time_dim']:].reshape((1,DIM['time_dim'])) x_rl_ngram = np.append( np.zeros(DIM['time_dim']), np.array(prefix['rl_pref']), axis=0)[-DIM['time_dim']:].reshape((1,DIM['time_dim'])) # times input shape(1,5,1) x_t_ngram = np.array([np.append( np.zeros(DIM['time_dim']), np.array(prefix['t_pref']), axis=0)[-DIM['time_dim']:].reshape((DIM['time_dim'], 1))]) acum_tbtw = 0 ac_suf, rl_suf = list(), list() for _ in range(1, max_trace_size): predictions = model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram]) if imp == 'Random Choice': # Use this to get a random choice following as PDF the predictions pos = np.random.choice(np.arange(0, len(predictions[0][0])), p=predictions[0][0]) pos1 = np.random.choice(np.arange(0, len(predictions[1][0])), p=predictions[1][0]) elif imp == 'Arg Max': # Use this to get the max prediction pos = np.argmax(predictions[0][0]) pos1 = np.argmax(predictions[1][0]) # Activities accuracy evaluation x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1) x_ac_ngram = np.delete(x_ac_ngram, 0, 1) x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1) x_rl_ngram = np.delete(x_rl_ngram, 0, 1) x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1) x_t_ngram = np.delete(x_t_ngram, 0, 1) # Stop if the next prediction is the end of the trace # otherwise until the defined max_size ac_suf.append(pos) rl_suf.append(pos1) if EXP['norm_method'] == 'lognorm': acum_tbtw += math.expm1(predictions[2][0][0] * TBTW['max_tbtw']) else: acum_tbtw += np.rint(predictions[2][0][0] * TBTW['max_tbtw']) if INDEX_AC[pos] == 'end': break prefix['ac_suff_pred'] = ac_suf prefix['rl_suff_pred'] = rl_suf prefix['rem_time_pred'] = acum_tbtw sup.print_done_task() return prefixes
def define_interarrival_tasks(process_graph, conformed_traces): # Analysis of start tasks tasks = analize_first_tasks(process_graph) inter_arrival_times = find_inter_arrival(tasks, conformed_traces) # for task in tasks: # process_graph.node[task]['type'] sup.print_done_task() return inter_arrival_times
def replay(process_graph, traces): start_tasks_list, end_tasks_list = find_start_finish_tasks(process_graph) subsec_set = create_subsec_set(process_graph) parallel_gt_exec = parallel_execution_list(process_graph) not_conformant_traces = list() conformant_traces = list() for index in range(0, len(traces)): trace = traces[index][1:-1] # Take out start and end event current_node = find_task_node(process_graph, trace[0]['task']) last_node = find_task_node(process_graph, trace[-1]['task']) # Check if is a complete trace if (current_node in start_tasks_list) and (last_node in end_tasks_list): temp_gt_exec = parallel_gt_exec cursor = list() cursor.append(current_node) removal_allowed = True is_conformant = True for i in range(1, len(trace)): next_node = find_task_node(process_graph,trace[i]['task']) # If loop management if next_node == cursor[-1]: process_graph.node[next_node]['executions'] += 1 else: try: cursor, prev_node = update_cursor(next_node, process_graph, cursor) except: is_conformant = False break for element in reversed(cursor[:-1]): # Process AND if process_graph.node[element]['type'] == 'gate3': gate = [d for d in temp_gt_exec if d['nod_num'] == element][0] gate.update(dict(executed= gate['executed'] + 1)) if gate['executed'] < gate['num_paths']: removal_allowed = False else: removal_allowed = True cursor.remove(element) # Process Task elif process_graph.node[element]['type'] == 'task': if (element,next_node) in subsec_set: if removal_allowed: cursor.remove(element) # Process other else: if removal_allowed: cursor.remove(element) if not is_conformant: not_conformant_traces.append(trace) else: conformant_traces.append(traces[index]) # Append the original one else: # If it is not a complete trace not_conformant_traces.append(trace) sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ') sup.print_done_task() return conformant_traces, not_conformant_traces
def predict(model, prefixes, ac_alias, rl_alias, imp, max_trace_size): """Generate business process suffixes using a keras trained model. Args: model (keras model): keras trained model. prefixes (list): list of prefixes. ac_index (dict): index of activities. rl_index (dict): index of roles. imp (str): method of next event selection. max_trace_size (int): max size of the trace """ # Generation of predictions for prefix in prefixes: x_trace = list() x_ac_ngram = np.array([prefix['ac_pref']]) x_rl_ngram = np.array([prefix['rl_pref']]) x_t_ngram = np.array([prefix['t_pref']]) acum_tbtw = 0 ac_suf, rl_suf = '', '' for _ in range(1, max_trace_size): predictions = model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram]) if imp == 'Random Choice': # Use this to get a random choice following as PDF the predictions pos = np.random.choice(np.arange(0, len(predictions[0][0])), p=predictions[0][0]) pos1 = np.random.choice(np.arange(0, len(predictions[1][0])), p=predictions[1][0]) elif imp == 'Arg Max': # Use this to get the max prediction pos = np.argmax(predictions[0][0]) pos1 = np.argmax(predictions[1][0]) x_trace.append([pos, pos1, predictions[2][0][0]]) # Add prediction to n_gram x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1) x_ac_ngram = np.delete(x_ac_ngram, 0, 1) x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1) x_rl_ngram = np.delete(x_rl_ngram, 0, 1) x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1) x_t_ngram = np.delete(x_t_ngram, 0, 1) # Stop if the next prediction is the end of the trace # otherwise until the defined max_size if INDEX_AC[pos] == 'end': break else: ac_suf += ac_alias[pos] rl_suf += rl_alias[pos1] if EXP['norm_method'] == 'lognorm': acum_tbtw += math.expm1(predictions[2][0][0] * TBTW['max_tbtw']) else: acum_tbtw += np.rint(predictions[2][0][0] * TBTW['max_tbtw']) prefix['ac_suf_pred'] = ac_suf prefix['rl_suf_pred'] = rl_suf prefix['rem_time_pred'] = acum_tbtw # sup.print_progress((((case+1) / num_cases)* 100), 'Generating process traces ') # case += 1 sup.print_done_task() return prefixes
def get_xes_events_data(self, filename,start_timeformat, end_timeformat, ns_include, one_timestamp): """reads and parse all the events information from a xes file""" temp_data = list() tree = ET.parse(filename) root = tree.getroot() if ns_include: #TODO revisar como poder cargar el mane space de forma automatica del root ns = {'xes': root.tag.split('}')[0].strip('{')} tags = dict(trace='xes:trace',string='xes:string',event='xes:event',date='xes:date') else: ns = {'xes':''} tags = dict(trace='trace',string='string',event='event',date='date') traces = root.findall(tags['trace'], ns) i = 0 sup.print_performed_task('Reading log traces ') for trace in traces: # sup.print_progress(((i / (len(traces) - 1)) * 100), 'Reading log traces ') caseid = '' for string in trace.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': caseid = string.attrib['value'] for event in trace.findall(tags['event'], ns): task = '' user = '' event_type = '' complete_timestamp = '' for string in event.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': task = string.attrib['value'] if string.attrib['key'] == 'org:resource': user = string.attrib['value'] if string.attrib['key'] == 'lifecycle:transition': event_type = string.attrib['value'].lower() if string.attrib['key'] == 'Complete_Timestamp': complete_timestamp = string.attrib['value'] if complete_timestamp != 'End': complete_timestamp = datetime.datetime.strptime(complete_timestamp, end_timeformat) timestamp = '' for date in event.findall(tags['date'], ns): if date.attrib['key'] == 'time:timestamp': timestamp = date.attrib['value'] try: timestamp = datetime.datetime.strptime(timestamp[:-6], start_timeformat) except ValueError: timestamp = datetime.datetime.strptime(timestamp, start_timeformat) if not (task == '0' or task == '-1'): temp_data.append( dict(caseid=caseid, task=task, event_type=event_type, user=user, start_timestamp=timestamp, end_timestamp=complete_timestamp)) i += 1 raw_data = temp_data temp_data = self.reorder_xes(temp_data, one_timestamp) sup.print_done_task() return temp_data, raw_data
def predict(model, prefixes, ac_alias, rl_alias, imp): """Generate business process suffixes using a keras trained model. Args: model (keras model): keras trained model. prefixes (list): list of prefixes. ac_index (dict): index of activities. rl_index (dict): index of roles. imp (str): method of next event selection. """ # Generation of predictions for prefix in prefixes: # Activities and roles input shape(1,5) x_ac_ngram = np.append(np.zeros(DIM['time_dim']), np.array(prefix['ac_pref']), axis=0)[-DIM['time_dim']:].reshape( (1, DIM['time_dim'])) x_rl_ngram = np.append(np.zeros(DIM['time_dim']), np.array(prefix['rl_pref']), axis=0)[-DIM['time_dim']:].reshape( (1, DIM['time_dim'])) # times input shape(1,5,1) x_t_ngram = np.array([ np.append(np.zeros(DIM['time_dim']), np.array(prefix['t_pref']), axis=0)[-DIM['time_dim']:].reshape((DIM['time_dim'], 1)) ]) predictions = model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram]) if imp == 'Random Choice': # Use this to get a random choice following as PDF the predictions pos = np.random.choice(np.arange(0, len(predictions[0][0])), p=predictions[0][0]) pos1 = np.random.choice(np.arange(0, len(predictions[1][0])), p=predictions[1][0]) elif imp == 'Arg Max': # Use this to get the max prediction pos = np.argmax(predictions[0][0]) pos1 = np.argmax(predictions[1][0]) # Activities accuracy evaluation if pos == prefix['ac_next']: prefix['ac_true'] = 1 else: prefix['ac_true'] = 0 # Roles accuracy evaluation if pos1 == prefix['rl_next']: prefix['rl_true'] = 1 else: prefix['rl_true'] = 0 sup.print_done_task() return prefixes
def role_discovery(data, drawing, sim_percentage): tasks = list(set(list(map(lambda x: x[0], data)))) try: tasks.remove('Start') except Exception: pass tasks = [dict(index=i, data=tasks[i]) for i in range(0, len(tasks))] users = list(set(list(map(lambda x: x[1], data)))) try: users.remove('Start') except Exception: pass users = [dict(index=i, data=users[i]) for i in range(0, len(users))] data_transform = list( map(lambda x: [find_index(tasks, x[0]), find_index(users, x[1])], data)) unique = list(set(tuple(i) for i in data_transform)) unique = [list(i) for i in unique] # [print(uni) for uni in users] # building of a task-size profile of task execution per resource profiles = build_profile(users, det_freq_matrix(unique, data_transform), len(tasks)) sup.print_performed_task('Analysing resource pool ') # sup.print_progress(((20 / 100)* 100),'Analysing resource pool ') # building of a correlation matrix between resouces profiles correlation_matrix = det_correlation_matrix(profiles) # sup.print_progress(((40 / 100)* 100),'Analysing resource pool ') # creation of a relation network between resouces g = nx.Graph() for user in users: g.add_node(user['index']) for relation in correlation_matrix: # creation of edges between nodes excluding the same element correlation # and those below the 0.7 threshold of similarity if relation['distance'] > sim_percentage and relation['x'] != relation[ 'y']: g.add_edge(relation['x'], relation['y'], weight=relation['distance']) # sup.print_progress(((60 / 100)* 100),'Analysing resource pool ') # extraction of fully conected subgraphs as roles sub_graphs = list(nx.connected_component_subgraphs(g)) # sup.print_progress(((80 / 100)* 100),'Analysing resource pool ') # role definition from graph roles = role_definition(sub_graphs, users) # plot creation (optional) if drawing == True: graph_network(g, sub_graphs) # sup.print_progress(((100 / 100)* 100),'Analysing resource pool ') sup.print_done_task() return roles
def get_mxml_events_data(self, filename, start_timeformat, end_timeformat): """read and parse all the events information from a MXML file""" temp_data = list() tree = ET.parse(filename) root = tree.getroot() process = root.find('Process') procInstas = process.findall('ProcessInstance') i = 0 for procIns in procInstas: sup.print_progress(((i / (len(procInstas) - 1)) * 100), 'Reading log traces ') caseid = procIns.get('id') complete_timestamp = '' auditTrail = procIns.findall('AuditTrailEntry') for trail in auditTrail: task = '' user = '' event_type = '' type_task = '' timestamp = '' attributes = trail.find('Data').findall('Attribute') for attr in attributes: if (attr.get('name') == 'concept:name'): task = attr.text if (attr.get('name') == 'lifecycle:transition'): event_type = attr.text if (attr.get('name') == 'org:resource'): user = attr.text if (attr.get('name') == 'type_task'): type_task = attr.text work_flow_ele = trail.find('WorkflowModelElement').text event_type = trail.find('EventType').text timestamp = trail.find('Timestamp').text originator = trail.find('Originator').text timestamp = datetime.datetime.strptime( trail.find('Timestamp').text[:-6], start_timeformat) temp_data.append( dict(caseid=caseid, task=task, event_type=event_type, user=user, start_timestamp=timestamp, end_timestamp=timestamp)) i += 1 raw_data = temp_data temp_data = self.reorder_mxml(temp_data) sup.print_done_task() return temp_data, raw_data
def _predict_event_log_shared_cat(self, parms): """Generate business process traces using a keras trained model. Args: model (keras model): keras trained model. imp (str): method of next event selection. num_cases (int): number of traces to generate. max_trace_size (int): max size of the trace """ sup.print_performed_task('Generating traces') generated_event_log = list() for case in range(0, parms['num_cases']): x_trace = list() x_ac_ngram = np.zeros( (1, parms['dim']['time_dim']), dtype=np.float32) x_rl_ngram = np.zeros( (1, parms['dim']['time_dim']), dtype=np.float32) x_t_ngram = np.zeros( (1, parms['dim']['time_dim'], 1), dtype=np.float32) # TODO: add intercase support for _ in range(1, self.max_trace_size): predictions = self.model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram]) if self.imp == 'Random Choice': # Use this to get a random choice following as PDF pos = np.random.choice( np.arange(0, len(predictions[0][0])), p=predictions[0][0]) pos1 = np.random.choice( np.arange(0, len(predictions[1][0])), p=predictions[1][0]) elif self.imp == 'Arg Max': # Use this to get the max prediction pos = np.argmax(predictions[0][0]) pos1 = np.argmax(predictions[1][0]) x_trace.append([pos, pos1, predictions[2][0][0]]) # # Add prediction to n_gram x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1) x_ac_ngram = np.delete(x_ac_ngram, 0, 1) x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1) x_rl_ngram = np.delete(x_rl_ngram, 0, 1) x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1) x_t_ngram = np.delete(x_t_ngram, 0, 1) # # Stop if the next prediction is the end of the trace # # otherwise until the defined max_size if parms['index_ac'][pos] == 'end': break generated_event_log.extend(self.decode_trace(parms, x_trace, case)) sup.print_done_task() return generated_event_log
def define_probabilities(process_graph,bpmn,log, type): # Analisys of gateways probabilities if (type==1): gateways = analize_gateways(process_graph,log) elif(type==2): gateways = analize_gateways_random(process_graph,log) elif(type==3): gateways = analize_gateways_equi(process_graph,log) # Creating response list response = list() gateways=normalize_probabilities(process_graph,gateways) for gateway in gateways: # print("gateway prob", process_graph.node[gateway['gate']]['id']) gatewayId = process_graph.node[gateway['gate']]['id'] for path in gateway['targets']: sequence_id = bpmn.find_sequence_id(process_graph.node[gateway['gate']]['id'],process_graph.node[path['out_node']]['id']) response.append(dict(gatewayid=gatewayId,elementid=sequence_id,prob=path['probability'])) sup.print_done_task() return response
def discover_roles(self): associations = lambda x: (self.tasks[x['task']], self.users[x['user']]) self.data['ac_rl'] = self.data.apply(associations, axis=1) freq_matrix = (self.data.groupby(by='ac_rl')['task'] .count() .reset_index() .rename(columns={'task': 'freq'})) freq_matrix = {x['ac_rl']: x['freq'] for x in freq_matrix.to_dict('records')} profiles = self.build_profile(freq_matrix) sup.print_progress(((20 / 100)* 100),'Analysing resource pool ') # building of a correl matrix between resouces profiles correl_matrix = self.det_correl_matrix(profiles) sup.print_progress(((40 / 100)* 100),'Analysing resource pool ') # creation of a rel network between resouces g = nx.Graph() for user in self.users.values(): g.add_node(user) for rel in correl_matrix: # creation of edges between nodes excluding the same elements # and those below the similarity threshold if rel['distance'] > self.sim_threshold and rel['x'] != rel['y']: g.add_edge(rel['x'], rel['y'], weight=rel['distance']) sup.print_progress(((60 / 100) * 100),'Analysing resource pool ') # extraction of fully conected subgraphs as roles sub_graphs = list(nx.connected_component_subgraphs(g)) sup.print_progress(((80 / 100) * 100),'Analysing resource pool ') # role definition from graph roles = self.role_definition(sub_graphs) # plot creation (optional) # if drawing == True: # graph_network(g, sub_graphs) sup.print_progress(((100 / 100)* 100),'Analysing resource pool ') sup.print_done_task() return roles
def align_traces(log, settings): """this method is the kernel of all the alignment process""" evaluate_alignment(settings) optimal_alignments = read_alignment_info(settings['aligninfo']) traces_alignments = traces_alignment_type(settings['aligntype']) raw_traces=log.get_raw_traces() aligned_traces = list() i = 0 size = len(raw_traces) for raw_trace in raw_traces: try: # Alignment of each trace aligned_trace = process_trace(raw_trace, optimal_alignments, traces_alignments ) # Conformity check and reformating aligned_trace = trace_verification(aligned_trace, raw_trace) if aligned_trace: aligned_traces.extend(aligned_trace) except Exception as e: print(str(e)) sup.print_progress(((i / (size-1))* 100),'Aligning log traces with model ') i += 1 sup.print_done_task() return aligned_traces
def define_probabilities(self) -> None: """ Defines the gateways' probabilities according with an spcified method """ sup.print_performed_task('Analysing gateways` probabilities') # Analisys of gateways probabilities if self.method == 'discovery': gateways = self.analize_gateways() elif self.method == 'random': gateways = self.analize_gateways_random() elif self.method == 'equiprobable': gateways = self.analize_gateways_equi() # Fix 0 probabilities and float error sums gateways = self.normalize_probabilities(gateways) # Creating response list gids = lambda x: self.process_graph.node[x['gate']]['id'] gateways['gatewayid'] = gateways.apply(gids, axis=1) gids = lambda x: self.process_graph.node[x['t_path']]['id'] gateways['out_path_id'] = gateways.apply(gids, axis=1) self.probabilities = gateways[['gatewayid', 'out_path_id', 'prob']].to_dict('records') sup.print_done_task()
def replay(self) -> None: """ Replays the event-log traces over the BPMN model """ for index in range(0, len(self.traces)): t_times = list() trace = self.traces[index][1:-1] # remove start and end event # Check if is a complete trace current_node = self.find_task_node(self.model, trace[0]['task']) last_node = self.find_task_node(self.model, trace[-1]['task']) if current_node not in self.start_tasks_list: self.not_conformant_traces.append(trace) continue if last_node not in self.end_tasks_list: self.not_conformant_traces.append(trace) continue # Initialize temp_gt_exec = self.parallel_gt_exec cursor = [current_node] remove = True is_conformant = True # ----time recording------ t_times = self.save_record(t_times, trace, 0) # ------------------------ for i in range(1, len(trace)): nnode = self.find_task_node(self.model, trace[i]['task']) # If loop management if nnode == cursor[-1]: t_times = self.save_record(t_times, trace, i, nnode) self.model.node[nnode]['executions'] += 1 continue try: cursor, pnode = self.update_cursor(nnode, self.model, cursor) # ----time recording------ t_times = self.save_record(t_times, trace, i, pnode) self.model.node[nnode]['executions'] += 1 # ------------------------ except: is_conformant = False break for element in reversed(cursor[:-1]): element_type = self.model.node[element]['type'] # Process AND if element_type == 'gate3': gate = [ d for d in temp_gt_exec if d['nod_num'] == element ][0] gate.update({'executed': gate['executed'] + 1}) if gate['executed'] < gate['num_paths']: remove = False else: remove = True cursor.remove(element) # Process Task elif element_type == 'task': if (element, nnode) in self.subsec_set and remove: cursor.remove(element) # Process other elif remove: cursor.remove(element) if is_conformant: # Append the original one self.conformant_traces.extend(self.traces[index]) self.process_stats.extend(t_times) else: self.not_conformant_traces.extend(trace) sup.print_progress(((index / (len(self.traces) - 1)) * 100), 'Replaying process traces ') self.calculate_process_metrics() sup.print_done_task()
def _predict_suffix_shared_cat(self, parms): """Generate business process suffixes using a keras trained model. Args: model (keras model): keras trained model. prefixes (list): list of prefixes. ac_index (dict): index of activities. rl_index (dict): index of roles. imp (str): method of next event selection. """ # Generation of predictions results = list() for i, _ in enumerate(self.spl['prefixes']['activities']): # Activities and roles input shape(1,5) x_ac_ngram = np.append(np.zeros(parms['dim']['time_dim']), np.array( self.spl['prefixes']['activities'][i]), axis=0)[-parms['dim']['time_dim']:].reshape( (1, parms['dim']['time_dim'])) x_rl_ngram = np.append(np.zeros(parms['dim']['time_dim']), np.array(self.spl['prefixes']['roles'][i]), axis=0)[-parms['dim']['time_dim']:].reshape( (1, parms['dim']['time_dim'])) # Times input shape(1,5,1) x_t_ngram = np.array([ np.append(np.zeros(parms['dim']['time_dim']), np.array(self.spl['prefixes']['times'][i]), axis=0)[-parms['dim']['time_dim']:].reshape( (parms['dim']['time_dim'], 1)) ]) if parms['model_type'] in ['shared_cat', 'cnn_lstm']: inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram] elif parms['model_type'] in [ 'shared_cat_inter', 'shared_cat_inter_full', 'shared_cat_rd', 'shared_cat_wl', 'shared_cat_cx', 'cnn_lstm_inter', 'cnn_lstm_inter_full', 'shared_cat_city', 'shared_cat_snap' ]: inter_attr_num = self.spl['prefixes']['inter_attr'][i].shape[1] x_inter_ngram = np.array([ np.append(np.zeros( (parms['dim']['time_dim'], inter_attr_num)), self.spl['prefixes']['inter_attr'][i], axis=0)[-parms['dim']['time_dim']:].reshape( (parms['dim']['time_dim'], inter_attr_num)) ]) inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram] pref_size = len(self.spl['prefixes']['activities'][i]) acum_dur = list() ac_suf, rl_suf = list(), list() for _ in range(1, self.max_trace_size): predictions = self.model.predict(inputs) if self.imp == 'Random Choice': # Use this to get a random choice following as PDF the predictions pos = np.random.choice(np.arange(0, len(predictions[0][0])), p=predictions[0][0]) pos1 = np.random.choice(np.arange(0, len(predictions[1][0])), p=predictions[1][0]) elif self.imp == 'Arg Max': # Use this to get the max prediction pos = np.argmax(predictions[0][0]) pos1 = np.argmax(predictions[1][0]) # Activities accuracy evaluation x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1) x_ac_ngram = np.delete(x_ac_ngram, 0, 1) x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1) x_rl_ngram = np.delete(x_rl_ngram, 0, 1) x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1) x_t_ngram = np.delete(x_t_ngram, 0, 1) if parms['model_type'] in ['shared_cat', 'cnn_lstm']: inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram] elif parms['model_type'] in [ 'shared_cat_inter', 'shared_cat_inter_full', 'shared_cat_rd', 'shared_cat_wl', 'shared_cat_cx', 'cnn_lstm_inter', 'cnn_lstm_inter_full', 'shared_cat_city', 'shared_cat_snap' ]: x_inter_ngram = np.append(x_inter_ngram, [predictions[3]], axis=1) x_inter_ngram = np.delete(x_inter_ngram, 0, 1) inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram] # Stop if the next prediction is the end of the trace # otherwise until the defined max_size ac_suf.append(pos) rl_suf.append(pos1) acum_dur.append(self.rescale(predictions[2][0][0], parms)) if parms['index_ac'][pos] == 'end': break results.append({ 'ac_pref': self.spl['prefixes']['activities'][i], 'ac_pred': ac_suf, 'ac_expect': self.spl['suffixes']['activities'][i], 'rl_pref': self.spl['prefixes']['roles'][i], 'rl_pred': rl_suf, 'rl_expect': self.spl['suffixes']['roles'][i], 'tm_pref': [ self.rescale(x, parms) for x in self.spl['prefixes']['times'][i] ], 'tm_pred': acum_dur, 'tm_expect': [ self.rescale(x, parms) for x in self.spl['suffixes']['times'][i] ], 'pref_size': pref_size }) sup.print_done_task() return results
def _predict_suffix_seq2seq(self, parms): """Generate business process suffixes using a keras trained model. Args: model (keras model): keras trained model. prefixes (list): list of prefixes. ac_index (dict): index of activities. rl_index (dict): index of roles. imp (str): method of next event selection. """ # Generation of predictions results = list() for i in range(0, len(self.spl['prefixes']['activities'])): act_prefix = self.spl['prefixes']['activities'][i].reshape( (1, self.spl['prefixes']['activities'][i].shape[0])) rl_prefix = self.spl['prefixes']['roles'][i].reshape( (1, self.spl['prefixes']['roles'][i].shape[0])) times_prefix = self.spl['prefixes']['times'][i].reshape( (1, self.spl['prefixes']['times'][i].shape[0], self.spl['prefixes']['times'][i].shape[1])) if parms['model_type'] == 'seq2seq': inputs = [act_prefix, rl_prefix, times_prefix] elif parms['model_type'] == 'seq2seq_inter': inter_prefix = self.spl['prefixes']['inter_attr'][i].reshape( (1, self.spl['prefixes']['inter_attr'][i].shape[0], self.spl['prefixes']['inter_attr'][i].shape[1])) inputs = [act_prefix, rl_prefix, times_prefix, inter_prefix] pref_size = len([ x for x in self.spl['prefixes']['activities'][i][1:] if x > 0 ]) predictions = self.model.predict(inputs) if self.imp == 'Random Choice': # Use this to get a random choice following as PDF act_pred = [ np.random.choice(np.arange(0, len(x)), p=x) for x in predictions[0][0] ] rl_pred = [ np.random.choice(np.arange(0, len(x)), p=x) for x in predictions[1][0] ] elif self.imp == 'Arg Max': # Use this to get the max prediction act_pred = [np.argmax(x) for x in predictions[0][0]] rl_pred = [np.argmax(x) for x in predictions[1][0]] # Activities accuracy evaluation if act_pred[0] == self.spl['suffixes']['activities'][i][0]: results['ac_true'].append(1) else: results['ac_true'].append(0) # Roles accuracy evaluation if rl_pred[0] == self.spl['suffixes']['roles'][i][0]: results['rl_true'].append(1) else: results['rl_true'].append(0) # Activities suffixes idx = self.define_pred_index(act_pred, parms) act_pred = act_pred[:idx] rl_pred = rl_pred[:idx] time_pred = predictions[2][0][:idx] if parms['norm_method'] == 'lognorm': time_pred = np.expm1(np.multiply(time_pred, parms['max_dur'])) else: time_pred = np.rint(np.multiply(time_pred, parms['max_dur'])) time_expected = 0 if parms['norm_method'] == 'lognorm': time_expected = np.expm1( np.multiply(self.spl['suffixes']['times'][i], parms['max_dur'])) else: time_expected = np.rint( np.multiply(self.spl['suffixes']['times'][i], parms['max_dur'])) # Append results results.append({ 'ac_pref': self.spl['prefixes']['activities'][i], 'ac_pred': act_pred, 'ac_expec': self.spl['suffixes']['activities'][i], 'rl_pref': self.spl['prefixes']['roles'][i], 'rl_pred': rl_pred, 'rl_expec': self.spl['suffixes']['roles'][i], 'tm_pref': self.spl['prefixes']['times'][i], 'tm_pred': time_pred, 'tm_expect': time_expected, 'pref_size': pref_size }) sup.print_done_task() return results
def extract_parameters(log, bpmn, process_graph): if bpmn != None and log != None: bpmnId = bpmn.getProcessId() startEventId = bpmn.getStartEventId() # Creation of process graph #------------------------------------------------------------------- # Analysing resource pool LV917 or 247 roles, resource_table = rl.read_resource_pool(log, drawing=False, sim_percentage=0.5) resource_pool, time_table, resource_table = sch.analize_schedules( resource_table, log, True, '247') #------------------------------------------------------------------- # Process replaying conformed_traces, not_conformed_traces, process_stats = rpl.replay( process_graph, log) # ------------------------------------------------------------------- # Adding role to process stats for stat in process_stats: role = list( filter(lambda x: x['resource'] == stat['resource'], resource_table))[0]['role'] stat['role'] = role #------------------------------------------------------------------- # Determination of first tasks for calculate the arrival rate inter_arrival_times = arr.define_interarrival_tasks( process_graph, conformed_traces) arrival_rate_bimp = (td.get_task_distribution(inter_arrival_times, 50)) arrival_rate_bimp['startEventId'] = startEventId #------------------------------------------------------------------- # Gateways probabilities 1=Historycal, 2=Random, 3=Equiprobable sequences = gt.define_probabilities(process_graph, bpmn, log, 1) #------------------------------------------------------------------- # Tasks id information elements_data = list() i = 0 task_list = list( filter(lambda x: process_graph.node[x]['type'] == 'task', list(nx.nodes(process_graph)))) for task in task_list: task_name = process_graph.node[task]['name'] task_id = process_graph.node[task]['id'] values = list( filter(lambda x: x['task'] == task_name, process_stats)) task_processing = [x['processing_time'] for x in values] dist = td.get_task_distribution(task_processing) max_role, max_count = '', 0 role_sorted = sorted(values, key=lambda x: x['role']) for key2, group2 in itertools.groupby(role_sorted, key=lambda x: x['role']): group_count = list(group2) if len(group_count) > max_count: max_count = len(group_count) max_role = key2 elements_data.append( dict(id=sup.gen_id(), elementid=task_id, type=dist['dname'], name=task_name, mean=str(dist['dparams']['mean']), arg1=str(dist['dparams']['arg1']), arg2=str(dist['dparams']['arg2']), resource=find_resource_id(resource_pool, max_role))) sup.print_progress(((i / (len(task_list) - 1)) * 100), 'Analysing tasks data ') i += 1 sup.print_done_task() parameters = dict(arrival_rate=arrival_rate_bimp, time_table=time_table, resource_pool=resource_pool, elements_data=elements_data, sequences=sequences, instances=len(conformed_traces), bpmnId=bpmnId) return parameters, process_stats
def _predict_next_event_shared_cat(self, parameters): """Generate business process suffixes using a keras trained model. Args: model (keras model): keras trained model. prefixes (list): list of prefixes. ac_index (dict): index of activities. rl_index (dict): index of roles. imp (str): method of next event selection. """ # Generation of predictions results = list() for i, _ in enumerate(self.spl['prefixes']['activities']): # Activities and roles input shape(1,5) x_ac_ngram = (np.append( np.zeros(parameters['dim']['time_dim']), np.array(self.spl['prefixes']['activities'][i]), axis=0)[-parameters['dim']['time_dim']:].reshape( (1, parameters['dim']['time_dim']))) x_rl_ngram = (np.append( np.zeros(parameters['dim']['time_dim']), np.array(self.spl['prefixes']['roles'][i]), axis=0)[-parameters['dim']['time_dim']:].reshape( (1, parameters['dim']['time_dim']))) # times input shape(1,5,1) x_t_ngram = (np.array([ np.append(np.zeros(parameters['dim']['time_dim']), np.array(self.spl['prefixes']['times'][i]), axis=0)[-parameters['dim']['time_dim']:].reshape( (parameters['dim']['time_dim'], 1)) ])) # add intercase features if necessary if parameters['model_type'] in ['shared_cat', 'cnn_lstm']: inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram] elif parameters['model_type'] in [ 'shared_cat_inter', 'shared_cat_inter_full', 'shared_cat_rd', 'shared_cat_wl', 'shared_cat_cx', 'cnn_lstm_inter', 'cnn_lstm_inter_full', 'shared_cat_city', 'shared_cat_snap' ]: # times input shape(1,5,1) inter_attr_num = ( self.spl['prefixes']['inter_attr'][i].shape[1]) x_inter_ngram = np.array([ np.append(np.zeros( (parameters['dim']['time_dim'], inter_attr_num)), self.spl['prefixes']['inter_attr'][i], axis=0)[-parameters['dim']['time_dim']:].reshape( (parameters['dim']['time_dim'], inter_attr_num)) ]) inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram] # predict predictions = self.model.predict(inputs) if self.imp == 'Random Choice': # Use this to get a random choice following as PDF pos = np.random.choice(np.arange(0, len(predictions[0][0])), p=predictions[0][0]) pos1 = np.random.choice(np.arange(0, len(predictions[1][0])), p=predictions[1][0]) elif self.imp == 'Arg Max': # Use this to get the max prediction pos = np.argmax(predictions[0][0]) pos1 = np.argmax(predictions[1][0]) # save results results.append({ 'ac_prefix': self.spl['prefixes']['activities'][i], 'ac_expect': self.spl['next_evt']['activities'][i], 'ac_pred': pos, 'rl_prefix': self.spl['prefixes']['roles'][i], 'rl_expect': self.spl['next_evt']['roles'][i], 'rl_pred': pos1, 'tm_prefix': [ self.rescale(x, parameters) for x in self.spl['prefixes']['times'][i] ], 'tm_expect': self.rescale(self.spl['next_evt']['times'][i], parameters), 'tm_pred': self.rescale(predictions[2][0][0], parameters) }) sup.print_done_task() return results
def get_xes_events_data(self): """ reads and parse all the events information from a xes file """ temp_data = list() tree = ET.parse(self.input) root = tree.getroot() if self.ns_include: ns = {'xes': root.tag.split('}')[0].strip('{')} tags = dict(trace='xes:trace', string='xes:string', event='xes:event', date='xes:date') else: ns = {'xes': ''} tags = dict(trace='trace', string='string', event='event', date='date') traces = root.findall(tags['trace'], ns) i = 0 sup.print_performed_task('Reading log traces ') for trace in traces: temp_trace = list() caseid = '' for string in trace.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': caseid = string.attrib['value'] for event in trace.findall(tags['event'], ns): task = '' user = '' event_type = '' for string in event.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': task = string.attrib['value'] if string.attrib['key'] == 'org:resource': user = string.attrib['value'] if string.attrib['key'] == 'lifecycle:transition': event_type = string.attrib['value'].lower() timestamp = '' for date in event.findall(tags['date'], ns): if date.attrib['key'] == 'time:timestamp': timestamp = date.attrib['value'] try: timestamp = datetime.datetime.strptime( timestamp[:-6], self.timeformat) except ValueError: timestamp = datetime.datetime.strptime( timestamp, self.timeformat) # By default remove Start and End events # but will be added to standardize if task not in ['0', '-1', 'Start', 'End', 'start', 'end']: if ((not self.one_timestamp) or (self.one_timestamp and event_type == 'complete')): temp_trace.append( dict(caseid=caseid, task=task, event_type=event_type, user=user, timestamp=timestamp)) if temp_trace: temp_trace = self.append_xes_start_end(temp_trace) temp_data.extend(temp_trace) i += 1 self.raw_data = temp_data self.data = self.reorder_xes(temp_data) sup.print_done_task()
def replay(process_graph, log, source='log', run_num=0): subsec_set = create_subsec_set(process_graph) parallel_gt_exec = parallel_execution_list(process_graph) not_conformant_traces = list() conformant_traces=list() process_stats=list() traces = log.get_traces() for index in range(0,len(traces)): trace_times = list() trace = traces[index] temp_gt_exec = parallel_gt_exec cursor = list() current_node = find_task_node(process_graph,trace[0]['task']) cursor.append(current_node) removal_allowed = True is_conformant = True #----time recording------ trace_times.append(create_record(trace, 0)) #------------------------ for i in range(1, len(trace)): next_node = find_task_node(process_graph,trace[i]['task']) # If loop management if next_node == cursor[-1]: prev_record = find_previous_record(trace_times, process_graph.node[next_node]['name']) trace_times.append(create_record(trace, i, prev_record)) process_graph.node[next_node]['executions'] += 1 else: try: cursor, prev_node = update_cursor(next_node, process_graph, cursor) #----time recording------ prev_record = find_previous_record(trace_times, process_graph.node[prev_node]['name']) trace_times.append(create_record(trace, i, prev_record)) process_graph.node[next_node]['executions'] += 1 #------------------------ except: is_conformant = False break for element in reversed(cursor[:-1]): # Process AND if process_graph.node[element]['type'] == 'gate3': gate = [d for d in temp_gt_exec if d['nod_num'] == element][0] gate.update(dict(executed= gate['executed'] + 1)) if gate['executed'] < gate['num_paths']: removal_allowed = False else: removal_allowed = True cursor.remove(element) # Process Task elif process_graph.node[element]['type'] == 'task': if (element,next_node) in subsec_set: if removal_allowed: cursor.remove(element) # Process other else: if removal_allowed: cursor.remove(element) if not is_conformant: not_conformant_traces.append(trace) else: conformant_traces.append(trace) process_stats.extend(trace_times) sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ') #------Filtering records and calculate stats--- process_stats = list(filter(lambda x: x['task'] != 'Start' and x['task'] != 'End' and x['resource'] != 'AUTO', process_stats)) process_stats = calculate_process_metrics(process_stats) [x.update(dict(source=source, run_num=run_num)) for x in process_stats] #---------------------------------------------- sup.print_done_task() #------conformance percentage------------------ # print('Conformance percentage: ' + str(sup.ffloat((len(conformant_traces)/len(traces)) * 100,2)) + '%') #---------------------------------------------- return conformant_traces, not_conformant_traces, process_stats