def analize_first_tasks(self, process_graph): """ Extracts the first tasks of the process Parameters ---------- process_graph : Networkx di-graph Returns ------- list of tasks """ tasks_list = list() for node in process_graph.nodes: if process_graph.node[node]['type']=='task': tasks_list.append( self.find_tasks_predecesors(process_graph,node)) in_tasks = list() i=0 for task in tasks_list: sup.print_progress(((i / (len(tasks_list)-1))* 100), 'Defining inter-arrival rate ') for path in task['sources']: for in_task in path['in_tasks']: if process_graph.node[in_task]['type']=='start': in_tasks.append( process_graph.node[task['task']]['name']) i+=1 return list(set(in_tasks))
def analize_gateways(process_graph,log): nodes_list = list() for node in process_graph.nodes: if process_graph.node[node]['type']=='gate': nodes_list.append(analize_gateway_structure(process_graph,node)) i=0 for node in nodes_list: if len(nodes_list) > 1: sup.print_progress(((i / (len(nodes_list)-1))* 100),'Analysing gateways probabilities ') else: sup.print_progress(((i / (len(nodes_list)))* 100),'Analysing gateways probabilities ') total_ocurrences = 0 for path in node['targets']: ocurrences = 0 for out_task in path['out_tasks']: ocurrences += process_graph.node[out_task]['executions'] path['ocurrences'] = ocurrences total_ocurrences += path['ocurrences'] for path in node['targets']: if total_ocurrences > 0: probability = path['ocurrences']/total_ocurrences # print(node['gate'],process_graph.node[path['out_node']]['name'], path['ocurrences'],probability, sep=' ') else: probability = 0 path['probability'] = round(probability,2) i+=1 return nodes_list
def align_traces(self): """ This method is the kernel of the alignment process """ aligned_traces = list() i = 0 size = len(self.traces) for trace in self.traces: # Remove Start and End events trace = [x for x in trace if x['task'] not in ['Start', 'End']] try: # Alignment of each trace aligned_trace = self.process_trace(trace) if self.one_timestamp: aligned_trace = sorted(aligned_trace, key=itemgetter('end_timestamp')) aligned_trace = self.append_start_end(aligned_trace) aligned_traces.extend(aligned_trace) else: # completeness check and reformating aligned_trace = self.trace_verification(aligned_trace) if aligned_trace: aligned_trace = self.append_start_end(aligned_trace) aligned_traces.extend(aligned_trace) except Exception as e: next sup.print_progress(((i / (size - 1)) * 100), 'Aligning log traces with model ') i += 1 sup.print_done_task() return aligned_traces
def get_csv_events_data(self, log_columns_numbers, start_timeformat, end_timeformat): """reads and parse all the events information from a csv file""" flength = sup.file_size(self.input) i = 0 temp_data = list() with open(self.input, 'r') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='"') next(filereader, None) # skip the headers for row in filereader: sup.print_progress(((i / (flength - 1)) * 100), 'Reading log traces ') timestamp = '' complete_timestamp = '' if row[log_columns_numbers[1]] != 'End': timestamp = datetime.datetime.strptime( row[log_columns_numbers[4]], start_timeformat) complete_timestamp = datetime.datetime.strptime( row[log_columns_numbers[5]], end_timeformat) temp_data.append( dict(caseid=row[log_columns_numbers[0]], task=row[log_columns_numbers[1]], event_type=row[log_columns_numbers[2]], user=row[log_columns_numbers[3]], start_timestamp=timestamp, end_timestamp=complete_timestamp)) i += 1 return temp_data, temp_data
def replay(process_graph, traces): start_tasks_list, end_tasks_list = find_start_finish_tasks(process_graph) subsec_set = create_subsec_set(process_graph) parallel_gt_exec = parallel_execution_list(process_graph) not_conformant_traces = list() conformant_traces = list() for index in range(0, len(traces)): trace = traces[index][1:-1] # Take out start and end event current_node = find_task_node(process_graph, trace[0]['task']) last_node = find_task_node(process_graph, trace[-1]['task']) # Check if is a complete trace if (current_node in start_tasks_list) and (last_node in end_tasks_list): temp_gt_exec = parallel_gt_exec cursor = list() cursor.append(current_node) removal_allowed = True is_conformant = True for i in range(1, len(trace)): next_node = find_task_node(process_graph,trace[i]['task']) # If loop management if next_node == cursor[-1]: process_graph.node[next_node]['executions'] += 1 else: try: cursor, prev_node = update_cursor(next_node, process_graph, cursor) except: is_conformant = False break for element in reversed(cursor[:-1]): # Process AND if process_graph.node[element]['type'] == 'gate3': gate = [d for d in temp_gt_exec if d['nod_num'] == element][0] gate.update(dict(executed= gate['executed'] + 1)) if gate['executed'] < gate['num_paths']: removal_allowed = False else: removal_allowed = True cursor.remove(element) # Process Task elif process_graph.node[element]['type'] == 'task': if (element,next_node) in subsec_set: if removal_allowed: cursor.remove(element) # Process other else: if removal_allowed: cursor.remove(element) if not is_conformant: not_conformant_traces.append(trace) else: conformant_traces.append(traces[index]) # Append the original one else: # If it is not a complete trace not_conformant_traces.append(trace) sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ') sup.print_done_task() return conformant_traces, not_conformant_traces
def create_nodes(g, total_elements, index, array, node_type, node_name, node_id): i = 0 while i<len(array): sup.print_progress(((index / (total_elements-1))* 100),'Loading of bpmn structure from file ') g.add_node(index,type=node_type,name=array[i][node_name],id=array[i][node_id], executions=0, processing_times=list(), waiting_times=list(), multi_tasking=list(), temp_enable=None, temp_start=None, temp_end=None,tsk_act=False, gtact=False, xor_gtdir=0, gt_num_paths=0, gt_visited_paths=0) index +=1 i +=1 return index
def analize_gateways_random(process_graph,log): nodes_list = list() for node in process_graph.nodes: if process_graph.node[node]['type']=='gate': nodes_list.append(analize_gateway_structure(process_graph,node)) i=0 for node in nodes_list: sup.print_progress(((i / (len(nodes_list)-1))* 100),'Analysing gateways random probabilities ') prob_array = np.random.dirichlet(np.ones(len(node['targets'])),size=1) for i in range(0,len(node['targets'])): node['targets'][i]['probability'] = round(float(prob_array[0][i]),2) i+=1 return nodes_list
def get_mxml_events_data(self, filename, start_timeformat, end_timeformat): """read and parse all the events information from a MXML file""" temp_data = list() tree = ET.parse(filename) root = tree.getroot() process = root.find('Process') procInstas = process.findall('ProcessInstance') i = 0 for procIns in procInstas: sup.print_progress(((i / (len(procInstas) - 1)) * 100), 'Reading log traces ') caseid = procIns.get('id') complete_timestamp = '' auditTrail = procIns.findall('AuditTrailEntry') for trail in auditTrail: task = '' user = '' event_type = '' type_task = '' timestamp = '' attributes = trail.find('Data').findall('Attribute') for attr in attributes: if (attr.get('name') == 'concept:name'): task = attr.text if (attr.get('name') == 'lifecycle:transition'): event_type = attr.text if (attr.get('name') == 'org:resource'): user = attr.text if (attr.get('name') == 'type_task'): type_task = attr.text work_flow_ele = trail.find('WorkflowModelElement').text event_type = trail.find('EventType').text timestamp = trail.find('Timestamp').text originator = trail.find('Originator').text timestamp = datetime.datetime.strptime( trail.find('Timestamp').text[:-6], start_timeformat) temp_data.append( dict(caseid=caseid, task=task, event_type=event_type, user=user, start_timestamp=timestamp, end_timestamp=timestamp)) i += 1 raw_data = temp_data temp_data = self.reorder_mxml(temp_data) sup.print_done_task() return temp_data, raw_data
def analize_first_tasks(process_graph): tasks_list = list() for node in process_graph.nodes: if process_graph.node[node]['type']=='task': tasks_list.append(find_tasks_predecesors(process_graph,node)) in_tasks = list() i=0 for task in tasks_list: sup.print_progress(((i / (len(tasks_list)-1))* 100),'Defining inter-arrival rate ') for path in task['sources']: for in_task in path['in_tasks']: if process_graph.node[in_task]['type']=='start': in_tasks.append(process_graph.node[task['task']]['name']) i+=1 return list(set(in_tasks))
def role_discovery(data, drawing, sim_percentage): tasks = list(set(list(map(lambda x: x[0], data)))) try: tasks.remove('Start') except Exception: pass tasks = [dict(index=i,data=tasks[i]) for i in range(0,len(tasks))] users = list(set(list(map(lambda x: x[1], data)))) try: users.remove('Start') except Exception: pass users = [dict(index=i,data=users[i]) for i in range(0,len(users))] data_transform = list(map(lambda x: [find_index(tasks, x[0]),find_index(users, x[1])], data )) unique = list(set(tuple(i) for i in data_transform)) unique = [list(i) for i in unique] # [print(uni) for uni in users] # building of a task-size profile of task execution per resource profiles = build_profile(users,det_freq_matrix(unique,data_transform),len(tasks)) sup.print_progress(((20 / 100)* 100),'Analysing resource pool ') # building of a correlation matrix between resouces profiles correlation_matrix = det_correlation_matrix(profiles) sup.print_progress(((40 / 100)* 100),'Analysing resource pool ') # creation of a relation network between resouces g = nx.Graph() for user in users: g.add_node(user['index']) for relation in correlation_matrix: # creation of edges between nodes excluding the same element correlation # and those below the 0.7 threshold of similarity if relation['distance'] > sim_percentage and relation['x']!=relation['y'] : g.add_edge(relation['x'],relation['y'],weight=relation['distance']) sup.print_progress(((60 / 100)* 100),'Analysing resource pool ') # extraction of fully conected subgraphs as roles sub_graphs = list(nx.connected_component_subgraphs(g)) sup.print_progress(((80 / 100)* 100),'Analysing resource pool ') # role definition from graph roles = role_definition(sub_graphs,users) # plot creation (optional) if drawing == True: graph_network(g, sub_graphs) sup.print_progress(((100 / 100)* 100),'Analysing resource pool ') sup.print_done_task() return roles
def discover_roles(self): associations = lambda x: (self.tasks[x['task']], self.users[x['user']]) self.data['ac_rl'] = self.data.apply(associations, axis=1) freq_matrix = (self.data.groupby(by='ac_rl')['task'] .count() .reset_index() .rename(columns={'task': 'freq'})) freq_matrix = {x['ac_rl']: x['freq'] for x in freq_matrix.to_dict('records')} profiles = self.build_profile(freq_matrix) sup.print_progress(((20 / 100)* 100),'Analysing resource pool ') # building of a correl matrix between resouces profiles correl_matrix = self.det_correl_matrix(profiles) sup.print_progress(((40 / 100)* 100),'Analysing resource pool ') # creation of a rel network between resouces g = nx.Graph() for user in self.users.values(): g.add_node(user) for rel in correl_matrix: # creation of edges between nodes excluding the same elements # and those below the similarity threshold if rel['distance'] > self.sim_threshold and rel['x'] != rel['y']: g.add_edge(rel['x'], rel['y'], weight=rel['distance']) sup.print_progress(((60 / 100) * 100),'Analysing resource pool ') # extraction of fully conected subgraphs as roles sub_graphs = list(nx.connected_component_subgraphs(g)) sup.print_progress(((80 / 100) * 100),'Analysing resource pool ') # role definition from graph roles = self.role_definition(sub_graphs) # plot creation (optional) # if drawing == True: # graph_network(g, sub_graphs) sup.print_progress(((100 / 100)* 100),'Analysing resource pool ') sup.print_done_task() return roles
def align_traces(log, settings): """this method is the kernel of all the alignment process""" evaluate_alignment(settings) optimal_alignments = read_alignment_info(settings['aligninfo']) traces_alignments = traces_alignment_type(settings['aligntype']) raw_traces=log.get_raw_traces() aligned_traces = list() i = 0 size = len(raw_traces) for raw_trace in raw_traces: try: # Alignment of each trace aligned_trace = process_trace(raw_trace, optimal_alignments, traces_alignments ) # Conformity check and reformating aligned_trace = trace_verification(aligned_trace, raw_trace) if aligned_trace: aligned_traces.extend(aligned_trace) except Exception as e: print(str(e)) sup.print_progress(((i / (size-1))* 100),'Aligning log traces with model ') i += 1 sup.print_done_task() return aligned_traces
def replay(self) -> None: """ Replays the event-log traces over the BPMN model """ for index in range(0, len(self.traces)): t_times = list() trace = self.traces[index][1:-1] # remove start and end event # Check if is a complete trace current_node = self.find_task_node(self.model, trace[0]['task']) last_node = self.find_task_node(self.model, trace[-1]['task']) if current_node not in self.start_tasks_list: self.not_conformant_traces.append(trace) continue if last_node not in self.end_tasks_list: self.not_conformant_traces.append(trace) continue # Initialize temp_gt_exec = self.parallel_gt_exec cursor = [current_node] remove = True is_conformant = True # ----time recording------ t_times = self.save_record(t_times, trace, 0) # ------------------------ for i in range(1, len(trace)): nnode = self.find_task_node(self.model, trace[i]['task']) # If loop management if nnode == cursor[-1]: t_times = self.save_record(t_times, trace, i, nnode) self.model.node[nnode]['executions'] += 1 continue try: cursor, pnode = self.update_cursor(nnode, self.model, cursor) # ----time recording------ t_times = self.save_record(t_times, trace, i, pnode) self.model.node[nnode]['executions'] += 1 # ------------------------ except: is_conformant = False break for element in reversed(cursor[:-1]): element_type = self.model.node[element]['type'] # Process AND if element_type == 'gate3': gate = [ d for d in temp_gt_exec if d['nod_num'] == element ][0] gate.update({'executed': gate['executed'] + 1}) if gate['executed'] < gate['num_paths']: remove = False else: remove = True cursor.remove(element) # Process Task elif element_type == 'task': if (element, nnode) in self.subsec_set and remove: cursor.remove(element) # Process other elif remove: cursor.remove(element) if is_conformant: # Append the original one self.conformant_traces.extend(self.traces[index]) self.process_stats.extend(t_times) else: self.not_conformant_traces.extend(trace) sup.print_progress(((index / (len(self.traces) - 1)) * 100), 'Replaying process traces ') self.calculate_process_metrics() sup.print_done_task()
def extract_parameters(log, bpmn, process_graph): if bpmn != None and log != None: bpmnId = bpmn.getProcessId() startEventId = bpmn.getStartEventId() # Creation of process graph #------------------------------------------------------------------- # Analysing resource pool LV917 or 247 roles, resource_table = rl.read_resource_pool(log, drawing=False, sim_percentage=0.5) resource_pool, time_table, resource_table = sch.analize_schedules( resource_table, log, True, '247') #------------------------------------------------------------------- # Process replaying conformed_traces, not_conformed_traces, process_stats = rpl.replay( process_graph, log) # ------------------------------------------------------------------- # Adding role to process stats for stat in process_stats: role = list( filter(lambda x: x['resource'] == stat['resource'], resource_table))[0]['role'] stat['role'] = role #------------------------------------------------------------------- # Determination of first tasks for calculate the arrival rate inter_arrival_times = arr.define_interarrival_tasks( process_graph, conformed_traces) arrival_rate_bimp = (td.get_task_distribution(inter_arrival_times, 50)) arrival_rate_bimp['startEventId'] = startEventId #------------------------------------------------------------------- # Gateways probabilities 1=Historycal, 2=Random, 3=Equiprobable sequences = gt.define_probabilities(process_graph, bpmn, log, 1) #------------------------------------------------------------------- # Tasks id information elements_data = list() i = 0 task_list = list( filter(lambda x: process_graph.node[x]['type'] == 'task', list(nx.nodes(process_graph)))) for task in task_list: task_name = process_graph.node[task]['name'] task_id = process_graph.node[task]['id'] values = list( filter(lambda x: x['task'] == task_name, process_stats)) task_processing = [x['processing_time'] for x in values] dist = td.get_task_distribution(task_processing) max_role, max_count = '', 0 role_sorted = sorted(values, key=lambda x: x['role']) for key2, group2 in itertools.groupby(role_sorted, key=lambda x: x['role']): group_count = list(group2) if len(group_count) > max_count: max_count = len(group_count) max_role = key2 elements_data.append( dict(id=sup.gen_id(), elementid=task_id, type=dist['dname'], name=task_name, mean=str(dist['dparams']['mean']), arg1=str(dist['dparams']['arg1']), arg2=str(dist['dparams']['arg2']), resource=find_resource_id(resource_pool, max_role))) sup.print_progress(((i / (len(task_list) - 1)) * 100), 'Analysing tasks data ') i += 1 sup.print_done_task() parameters = dict(arrival_rate=arrival_rate_bimp, time_table=time_table, resource_pool=resource_pool, elements_data=elements_data, sequences=sequences, instances=len(conformed_traces), bpmnId=bpmnId) return parameters, process_stats
def replay(process_graph, log, source='log', run_num=0): subsec_set = create_subsec_set(process_graph) parallel_gt_exec = parallel_execution_list(process_graph) not_conformant_traces = list() conformant_traces=list() process_stats=list() traces = log.get_traces() for index in range(0,len(traces)): trace_times = list() trace = traces[index] temp_gt_exec = parallel_gt_exec cursor = list() current_node = find_task_node(process_graph,trace[0]['task']) cursor.append(current_node) removal_allowed = True is_conformant = True #----time recording------ trace_times.append(create_record(trace, 0)) #------------------------ for i in range(1, len(trace)): next_node = find_task_node(process_graph,trace[i]['task']) # If loop management if next_node == cursor[-1]: prev_record = find_previous_record(trace_times, process_graph.node[next_node]['name']) trace_times.append(create_record(trace, i, prev_record)) process_graph.node[next_node]['executions'] += 1 else: try: cursor, prev_node = update_cursor(next_node, process_graph, cursor) #----time recording------ prev_record = find_previous_record(trace_times, process_graph.node[prev_node]['name']) trace_times.append(create_record(trace, i, prev_record)) process_graph.node[next_node]['executions'] += 1 #------------------------ except: is_conformant = False break for element in reversed(cursor[:-1]): # Process AND if process_graph.node[element]['type'] == 'gate3': gate = [d for d in temp_gt_exec if d['nod_num'] == element][0] gate.update(dict(executed= gate['executed'] + 1)) if gate['executed'] < gate['num_paths']: removal_allowed = False else: removal_allowed = True cursor.remove(element) # Process Task elif process_graph.node[element]['type'] == 'task': if (element,next_node) in subsec_set: if removal_allowed: cursor.remove(element) # Process other else: if removal_allowed: cursor.remove(element) if not is_conformant: not_conformant_traces.append(trace) else: conformant_traces.append(trace) process_stats.extend(trace_times) sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ') #------Filtering records and calculate stats--- process_stats = list(filter(lambda x: x['task'] != 'Start' and x['task'] != 'End' and x['resource'] != 'AUTO', process_stats)) process_stats = calculate_process_metrics(process_stats) [x.update(dict(source=source, run_num=run_num)) for x in process_stats] #---------------------------------------------- sup.print_done_task() #------conformance percentage------------------ # print('Conformance percentage: ' + str(sup.ffloat((len(conformant_traces)/len(traces)) * 100,2)) + '%') #---------------------------------------------- return conformant_traces, not_conformant_traces, process_stats