コード例 #1
0
    def analize_first_tasks(self, process_graph):
        """
        Extracts the first tasks of the process

        Parameters
        ----------
        process_graph : Networkx di-graph

        Returns
        -------
        list of tasks

        """
        tasks_list = list()
        for node in process_graph.nodes:
            if process_graph.node[node]['type']=='task':
                tasks_list.append(
                    self.find_tasks_predecesors(process_graph,node))
        in_tasks = list()
        i=0
        for task in tasks_list:
            sup.print_progress(((i / (len(tasks_list)-1))* 100),
                               'Defining inter-arrival rate ')
            for path in task['sources']:
                for in_task in path['in_tasks']:
                    if process_graph.node[in_task]['type']=='start':
                        in_tasks.append(
                            process_graph.node[task['task']]['name'])
            i+=1
        return list(set(in_tasks))
コード例 #2
0
def analize_gateways(process_graph,log):
    nodes_list = list()
    for node in process_graph.nodes:
        if process_graph.node[node]['type']=='gate':
            nodes_list.append(analize_gateway_structure(process_graph,node))

    i=0
    for node in nodes_list:
        if len(nodes_list) > 1:
            sup.print_progress(((i / (len(nodes_list)-1))* 100),'Analysing gateways probabilities ')
        else:
            sup.print_progress(((i / (len(nodes_list)))* 100),'Analysing gateways probabilities ')

        total_ocurrences = 0
        for path in node['targets']:
            ocurrences = 0
            for out_task in path['out_tasks']:
                ocurrences += process_graph.node[out_task]['executions']
            path['ocurrences'] = ocurrences
            total_ocurrences += path['ocurrences']
        for path in node['targets']:
            if total_ocurrences > 0:
                probability = path['ocurrences']/total_ocurrences
#                print(node['gate'],process_graph.node[path['out_node']]['name'], path['ocurrences'],probability, sep=' ')
            else:
                probability = 0
            path['probability'] = round(probability,2)
        i+=1
    return nodes_list
コード例 #3
0
 def align_traces(self):
     """
     This method is the kernel of the alignment process
     """
     aligned_traces = list()
     i = 0
     size = len(self.traces)
     for trace in self.traces:
         # Remove Start and End events
         trace = [x for x in trace if x['task'] not in ['Start', 'End']]
         try:
             # Alignment of each trace
             aligned_trace = self.process_trace(trace)
             if self.one_timestamp:
                 aligned_trace = sorted(aligned_trace,
                                        key=itemgetter('end_timestamp'))
                 aligned_trace = self.append_start_end(aligned_trace)
                 aligned_traces.extend(aligned_trace)
             else:
                 # completeness check and reformating
                 aligned_trace = self.trace_verification(aligned_trace)
                 if aligned_trace:
                     aligned_trace = self.append_start_end(aligned_trace)
                     aligned_traces.extend(aligned_trace)
         except Exception as e:
             next
         sup.print_progress(((i / (size - 1)) * 100),
                            'Aligning log traces with model ')
         i += 1
     sup.print_done_task()
     return aligned_traces
コード例 #4
0
 def get_csv_events_data(self, log_columns_numbers, start_timeformat,
                         end_timeformat):
     """reads and parse all the events information from a csv file"""
     flength = sup.file_size(self.input)
     i = 0
     temp_data = list()
     with open(self.input, 'r') as csvfile:
         filereader = csv.reader(csvfile, delimiter=',', quotechar='"')
         next(filereader, None)  # skip the headers
         for row in filereader:
             sup.print_progress(((i / (flength - 1)) * 100),
                                'Reading log traces ')
             timestamp = ''
             complete_timestamp = ''
             if row[log_columns_numbers[1]] != 'End':
                 timestamp = datetime.datetime.strptime(
                     row[log_columns_numbers[4]], start_timeformat)
                 complete_timestamp = datetime.datetime.strptime(
                     row[log_columns_numbers[5]], end_timeformat)
             temp_data.append(
                 dict(caseid=row[log_columns_numbers[0]],
                      task=row[log_columns_numbers[1]],
                      event_type=row[log_columns_numbers[2]],
                      user=row[log_columns_numbers[3]],
                      start_timestamp=timestamp,
                      end_timestamp=complete_timestamp))
             i += 1
     return temp_data, temp_data
コード例 #5
0
def replay(process_graph, traces):
    start_tasks_list, end_tasks_list = find_start_finish_tasks(process_graph)
    subsec_set = create_subsec_set(process_graph)
    parallel_gt_exec = parallel_execution_list(process_graph)
    not_conformant_traces = list()
    conformant_traces = list()
    for index in range(0, len(traces)):
        trace = traces[index][1:-1] # Take out start and end event
        current_node = find_task_node(process_graph, trace[0]['task'])
        last_node = find_task_node(process_graph, trace[-1]['task'])
        # Check if is a complete trace
        if (current_node in start_tasks_list) and (last_node in end_tasks_list):
            temp_gt_exec = parallel_gt_exec
            cursor = list()
            cursor.append(current_node)
            removal_allowed = True
            is_conformant = True
            for i in range(1, len(trace)):
                next_node = find_task_node(process_graph,trace[i]['task'])
                # If loop management
                if next_node == cursor[-1]:
                    process_graph.node[next_node]['executions'] += 1
                else:
                    try:
                        cursor, prev_node = update_cursor(next_node, process_graph, cursor)
                    except:
                        is_conformant = False
                        break
                    for element in reversed(cursor[:-1]):
                        # Process AND
                        if process_graph.node[element]['type'] == 'gate3':
                            gate = [d for d in temp_gt_exec if d['nod_num'] == element][0]
                            gate.update(dict(executed= gate['executed'] + 1))
                            if gate['executed'] < gate['num_paths']:
                                removal_allowed = False
                            else:
                                removal_allowed = True
                                cursor.remove(element)
                        # Process Task
                        elif process_graph.node[element]['type'] == 'task':
                            if (element,next_node) in subsec_set:
                                if removal_allowed:
                                    cursor.remove(element)
                        # Process other
                        else:
                            if removal_allowed:
                                cursor.remove(element)
            if not is_conformant:
                not_conformant_traces.append(trace)
            else:
                conformant_traces.append(traces[index]) # Append the original one
        else:
            # If it is not a complete trace
            not_conformant_traces.append(trace)
        sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ')
    sup.print_done_task()
    return conformant_traces, not_conformant_traces
コード例 #6
0
ファイル: process_structure.py プロジェクト: dtdi/Simod
def create_nodes(g, total_elements, index, array, node_type, node_name, node_id):
    i = 0
    while i<len(array):
        sup.print_progress(((index / (total_elements-1))* 100),'Loading of bpmn structure from file ')
        g.add_node(index,type=node_type,name=array[i][node_name],id=array[i][node_id],
            executions=0, processing_times=list(), waiting_times=list(), multi_tasking=list(),
            temp_enable=None, temp_start=None, temp_end=None,tsk_act=False,
            gtact=False, xor_gtdir=0, gt_num_paths=0, gt_visited_paths=0)
        index +=1
        i +=1
    return index
コード例 #7
0
def analize_gateways_random(process_graph,log):
    nodes_list = list()
    for node in process_graph.nodes:
        if process_graph.node[node]['type']=='gate':
            nodes_list.append(analize_gateway_structure(process_graph,node))
    i=0
    for node in nodes_list:
        sup.print_progress(((i / (len(nodes_list)-1))* 100),'Analysing gateways random probabilities ')
        prob_array = np.random.dirichlet(np.ones(len(node['targets'])),size=1)
        for i in range(0,len(node['targets'])):
            node['targets'][i]['probability'] = round(float(prob_array[0][i]),2)
        i+=1
    return nodes_list
コード例 #8
0
    def get_mxml_events_data(self, filename, start_timeformat, end_timeformat):
        """read and parse all the events information from a MXML file"""
        temp_data = list()
        tree = ET.parse(filename)
        root = tree.getroot()
        process = root.find('Process')
        procInstas = process.findall('ProcessInstance')
        i = 0
        for procIns in procInstas:
            sup.print_progress(((i / (len(procInstas) - 1)) * 100),
                               'Reading log traces ')
            caseid = procIns.get('id')
            complete_timestamp = ''
            auditTrail = procIns.findall('AuditTrailEntry')
            for trail in auditTrail:
                task = ''
                user = ''
                event_type = ''
                type_task = ''
                timestamp = ''
                attributes = trail.find('Data').findall('Attribute')
                for attr in attributes:
                    if (attr.get('name') == 'concept:name'):
                        task = attr.text
                    if (attr.get('name') == 'lifecycle:transition'):
                        event_type = attr.text
                    if (attr.get('name') == 'org:resource'):
                        user = attr.text
                    if (attr.get('name') == 'type_task'):
                        type_task = attr.text
                work_flow_ele = trail.find('WorkflowModelElement').text
                event_type = trail.find('EventType').text
                timestamp = trail.find('Timestamp').text
                originator = trail.find('Originator').text
                timestamp = datetime.datetime.strptime(
                    trail.find('Timestamp').text[:-6], start_timeformat)
                temp_data.append(
                    dict(caseid=caseid,
                         task=task,
                         event_type=event_type,
                         user=user,
                         start_timestamp=timestamp,
                         end_timestamp=timestamp))

            i += 1
        raw_data = temp_data
        temp_data = self.reorder_mxml(temp_data)
        sup.print_done_task()
        return temp_data, raw_data
コード例 #9
0
def analize_first_tasks(process_graph):
	tasks_list = list()
	for node in process_graph.nodes:
		if process_graph.node[node]['type']=='task':
			tasks_list.append(find_tasks_predecesors(process_graph,node))
	in_tasks = list()
	i=0
	for task in tasks_list:
		sup.print_progress(((i / (len(tasks_list)-1))* 100),'Defining inter-arrival rate ')
		for path in task['sources']:
			for in_task in path['in_tasks']:
				if process_graph.node[in_task]['type']=='start':
					in_tasks.append(process_graph.node[task['task']]['name'])
		i+=1
	return list(set(in_tasks))
コード例 #10
0
ファイル: role_discovery.py プロジェクト: AndreRdz7/Simod
def role_discovery(data, drawing, sim_percentage):
    tasks = list(set(list(map(lambda x: x[0], data))))
    try:
        tasks.remove('Start')
    except Exception:
    	pass
    tasks = [dict(index=i,data=tasks[i]) for i in range(0,len(tasks))]
    users = list(set(list(map(lambda x: x[1], data))))
    try:
        users.remove('Start')
    except Exception:
    	pass
    users = [dict(index=i,data=users[i]) for i in range(0,len(users))]
    data_transform = list(map(lambda x: [find_index(tasks, x[0]),find_index(users, x[1])], data ))
    unique = list(set(tuple(i) for i in data_transform))
    unique = [list(i) for i in unique]
    # [print(uni) for uni in users]
    # building of a task-size profile of task execution per resource
    profiles = build_profile(users,det_freq_matrix(unique,data_transform),len(tasks))
    sup.print_progress(((20 / 100)* 100),'Analysing resource pool ')
    # building of a correlation matrix between resouces profiles
    correlation_matrix = det_correlation_matrix(profiles)
    sup.print_progress(((40 / 100)* 100),'Analysing resource pool ')
    # creation of a relation network between resouces
    g = nx.Graph()
    for user in users:
        g.add_node(user['index'])
    for relation in correlation_matrix:
        # creation of edges between nodes excluding the same element correlation
        # and those below the 0.7 threshold of similarity
        if relation['distance'] > sim_percentage and relation['x']!=relation['y'] :
            g.add_edge(relation['x'],relation['y'],weight=relation['distance'])
    sup.print_progress(((60 / 100)* 100),'Analysing resource pool ')
    # extraction of fully conected subgraphs as roles
    sub_graphs = list(nx.connected_component_subgraphs(g))
    sup.print_progress(((80 / 100)* 100),'Analysing resource pool ')
    # role definition from graph
    roles = role_definition(sub_graphs,users)
    # plot creation (optional)
    if drawing == True:
        graph_network(g, sub_graphs)
    sup.print_progress(((100 / 100)* 100),'Analysing resource pool ')
    sup.print_done_task()
    return roles
コード例 #11
0
ファイル: role_discovery.py プロジェクト: dtdi/Simod
 def discover_roles(self):
     associations = lambda x: (self.tasks[x['task']], self.users[x['user']])
     self.data['ac_rl'] = self.data.apply(associations, axis=1)
 
     freq_matrix = (self.data.groupby(by='ac_rl')['task']
                    .count()
                    .reset_index()
                    .rename(columns={'task': 'freq'}))
     freq_matrix = {x['ac_rl']: x['freq'] for x in freq_matrix.to_dict('records')}
     
     profiles = self.build_profile(freq_matrix)
 
     sup.print_progress(((20 / 100)* 100),'Analysing resource pool ')
     # building of a correl matrix between resouces profiles
     correl_matrix = self.det_correl_matrix(profiles)
     sup.print_progress(((40 / 100)* 100),'Analysing resource pool ')
     # creation of a rel network between resouces
     g = nx.Graph()
     for user in self.users.values():
         g.add_node(user)
     for rel in correl_matrix:
         # creation of edges between nodes excluding the same elements
         # and those below the similarity threshold 
         if rel['distance'] > self.sim_threshold and rel['x'] != rel['y']:
             g.add_edge(rel['x'],
                        rel['y'],
                        weight=rel['distance'])
     sup.print_progress(((60 / 100) * 100),'Analysing resource pool ')
     # extraction of fully conected subgraphs as roles
     sub_graphs = list(nx.connected_component_subgraphs(g))
     sup.print_progress(((80 / 100) * 100),'Analysing resource pool ')
     # role definition from graph
     roles = self.role_definition(sub_graphs)
     # plot creation (optional)
     # if drawing == True:
     #     graph_network(g, sub_graphs)
     sup.print_progress(((100 / 100)* 100),'Analysing resource pool ')
     sup.print_done_task()
     return roles
コード例 #12
0
ファイル: traces_alignment.py プロジェクト: AndreRdz7/Simod
def align_traces(log, settings):
    """this method is the kernel of all the alignment process"""
    evaluate_alignment(settings)
    optimal_alignments = read_alignment_info(settings['aligninfo'])
    traces_alignments = traces_alignment_type(settings['aligntype'])
    raw_traces=log.get_raw_traces()
    aligned_traces = list()
    i = 0
    size = len(raw_traces)
    for raw_trace in raw_traces:
        try:
            # Alignment of each trace
            aligned_trace = process_trace(raw_trace, optimal_alignments, traces_alignments )
            # Conformity check and reformating
            aligned_trace = trace_verification(aligned_trace, raw_trace)
            if aligned_trace:
                aligned_traces.extend(aligned_trace)
        except Exception as e:
            print(str(e))
        sup.print_progress(((i / (size-1))* 100),'Aligning log traces with model ')
        i += 1
    sup.print_done_task()
    return aligned_traces
コード例 #13
0
ファイル: log_replayer.py プロジェクト: Zohaib94/Simod
 def replay(self) -> None:
     """
     Replays the event-log traces over the BPMN model
     """
     for index in range(0, len(self.traces)):
         t_times = list()
         trace = self.traces[index][1:-1]  # remove start and end event
         # Check if is a complete trace
         current_node = self.find_task_node(self.model, trace[0]['task'])
         last_node = self.find_task_node(self.model, trace[-1]['task'])
         if current_node not in self.start_tasks_list:
             self.not_conformant_traces.append(trace)
             continue
         if last_node not in self.end_tasks_list:
             self.not_conformant_traces.append(trace)
             continue
         # Initialize
         temp_gt_exec = self.parallel_gt_exec
         cursor = [current_node]
         remove = True
         is_conformant = True
         # ----time recording------
         t_times = self.save_record(t_times, trace, 0)
         # ------------------------
         for i in range(1, len(trace)):
             nnode = self.find_task_node(self.model, trace[i]['task'])
             # If loop management
             if nnode == cursor[-1]:
                 t_times = self.save_record(t_times, trace, i, nnode)
                 self.model.node[nnode]['executions'] += 1
                 continue
             try:
                 cursor, pnode = self.update_cursor(nnode, self.model,
                                                    cursor)
                 # ----time recording------
                 t_times = self.save_record(t_times, trace, i, pnode)
                 self.model.node[nnode]['executions'] += 1
                 # ------------------------
             except:
                 is_conformant = False
                 break
             for element in reversed(cursor[:-1]):
                 element_type = self.model.node[element]['type']
                 # Process AND
                 if element_type == 'gate3':
                     gate = [
                         d for d in temp_gt_exec if d['nod_num'] == element
                     ][0]
                     gate.update({'executed': gate['executed'] + 1})
                     if gate['executed'] < gate['num_paths']:
                         remove = False
                     else:
                         remove = True
                         cursor.remove(element)
                 # Process Task
                 elif element_type == 'task':
                     if (element, nnode) in self.subsec_set and remove:
                         cursor.remove(element)
                 # Process other
                 elif remove:
                     cursor.remove(element)
         if is_conformant:
             # Append the original one
             self.conformant_traces.extend(self.traces[index])
             self.process_stats.extend(t_times)
         else:
             self.not_conformant_traces.extend(trace)
         sup.print_progress(((index / (len(self.traces) - 1)) * 100),
                            'Replaying process traces ')
     self.calculate_process_metrics()
     sup.print_done_task()
コード例 #14
0
def extract_parameters(log, bpmn, process_graph):
    if bpmn != None and log != None:
        bpmnId = bpmn.getProcessId()
        startEventId = bpmn.getStartEventId()
        # Creation of process graph
        #-------------------------------------------------------------------
        # Analysing resource pool LV917 or 247
        roles, resource_table = rl.read_resource_pool(log,
                                                      drawing=False,
                                                      sim_percentage=0.5)
        resource_pool, time_table, resource_table = sch.analize_schedules(
            resource_table, log, True, '247')
        #-------------------------------------------------------------------
        # Process replaying
        conformed_traces, not_conformed_traces, process_stats = rpl.replay(
            process_graph, log)
        # -------------------------------------------------------------------
        # Adding role to process stats
        for stat in process_stats:
            role = list(
                filter(lambda x: x['resource'] == stat['resource'],
                       resource_table))[0]['role']
            stat['role'] = role
        #-------------------------------------------------------------------
        # Determination of first tasks for calculate the arrival rate
        inter_arrival_times = arr.define_interarrival_tasks(
            process_graph, conformed_traces)
        arrival_rate_bimp = (td.get_task_distribution(inter_arrival_times, 50))
        arrival_rate_bimp['startEventId'] = startEventId
        #-------------------------------------------------------------------
        # Gateways probabilities 1=Historycal, 2=Random, 3=Equiprobable
        sequences = gt.define_probabilities(process_graph, bpmn, log, 1)
        #-------------------------------------------------------------------
        # Tasks id information
        elements_data = list()
        i = 0
        task_list = list(
            filter(lambda x: process_graph.node[x]['type'] == 'task',
                   list(nx.nodes(process_graph))))
        for task in task_list:
            task_name = process_graph.node[task]['name']
            task_id = process_graph.node[task]['id']
            values = list(
                filter(lambda x: x['task'] == task_name, process_stats))
            task_processing = [x['processing_time'] for x in values]
            dist = td.get_task_distribution(task_processing)
            max_role, max_count = '', 0
            role_sorted = sorted(values, key=lambda x: x['role'])
            for key2, group2 in itertools.groupby(role_sorted,
                                                  key=lambda x: x['role']):
                group_count = list(group2)
                if len(group_count) > max_count:
                    max_count = len(group_count)
                    max_role = key2
            elements_data.append(
                dict(id=sup.gen_id(),
                     elementid=task_id,
                     type=dist['dname'],
                     name=task_name,
                     mean=str(dist['dparams']['mean']),
                     arg1=str(dist['dparams']['arg1']),
                     arg2=str(dist['dparams']['arg2']),
                     resource=find_resource_id(resource_pool, max_role)))
            sup.print_progress(((i / (len(task_list) - 1)) * 100),
                               'Analysing tasks data ')
            i += 1
        sup.print_done_task()
        parameters = dict(arrival_rate=arrival_rate_bimp,
                          time_table=time_table,
                          resource_pool=resource_pool,
                          elements_data=elements_data,
                          sequences=sequences,
                          instances=len(conformed_traces),
                          bpmnId=bpmnId)
        return parameters, process_stats
コード例 #15
0
def replay(process_graph, log, source='log', run_num=0):
    subsec_set = create_subsec_set(process_graph)
    parallel_gt_exec = parallel_execution_list(process_graph)
    not_conformant_traces = list()
    conformant_traces=list()
    process_stats=list()
    traces = log.get_traces()
    for index in range(0,len(traces)):
        trace_times = list()
        trace = traces[index]
        temp_gt_exec = parallel_gt_exec
        cursor = list()
        current_node = find_task_node(process_graph,trace[0]['task'])
        cursor.append(current_node)
        removal_allowed = True
        is_conformant = True
        #----time recording------
        trace_times.append(create_record(trace, 0))
        #------------------------
        for i in range(1, len(trace)):
            next_node = find_task_node(process_graph,trace[i]['task'])
            # If loop management
            if next_node == cursor[-1]:
                prev_record = find_previous_record(trace_times, process_graph.node[next_node]['name'])
                trace_times.append(create_record(trace, i, prev_record))
                process_graph.node[next_node]['executions'] += 1
            else:
                try:
                    cursor, prev_node = update_cursor(next_node, process_graph, cursor)
                    #----time recording------
                    prev_record = find_previous_record(trace_times, process_graph.node[prev_node]['name'])
                    trace_times.append(create_record(trace, i, prev_record))
                    process_graph.node[next_node]['executions'] += 1
                    #------------------------
                except:
                    is_conformant = False
                    break
                for element in reversed(cursor[:-1]):
                    # Process AND
                    if process_graph.node[element]['type'] == 'gate3':
                        gate = [d for d in temp_gt_exec if d['nod_num'] == element][0]
                        gate.update(dict(executed= gate['executed'] + 1))
                        if gate['executed'] < gate['num_paths']:
                            removal_allowed = False
                        else:
                            removal_allowed = True
                            cursor.remove(element)
                    # Process Task
                    elif process_graph.node[element]['type'] == 'task':
                        if (element,next_node) in subsec_set:
                            if removal_allowed:
                                cursor.remove(element)
                    # Process other
                    else:
                        if removal_allowed:
                            cursor.remove(element)
        if not is_conformant:
            not_conformant_traces.append(trace)
        else:
            conformant_traces.append(trace)
            process_stats.extend(trace_times)
        sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ')
    #------Filtering records and calculate stats---
    process_stats = list(filter(lambda x: x['task'] != 'Start' and x['task'] != 'End' and x['resource'] != 'AUTO', process_stats))
    process_stats = calculate_process_metrics(process_stats)
    [x.update(dict(source=source, run_num=run_num)) for x in process_stats]
    #----------------------------------------------
    sup.print_done_task()
    #------conformance percentage------------------
#    print('Conformance percentage: ' + str(sup.ffloat((len(conformant_traces)/len(traces)) * 100,2)) + '%')
    #----------------------------------------------
    return conformant_traces, not_conformant_traces, process_stats