コード例 #1
0
ファイル: process_structure.py プロジェクト: dtdi/Simod
def create_process_structure(bpmn, drawing=False):
    # Loading of bpmn structure into a directed graph
    g = load_process_structure(bpmn)
    if drawing:
        graph_network_x(g)
    sup.print_done_task()
    return g
コード例 #2
0
 def get_csv_events_data(self):
     """
     reads and parse all the events information from a csv file
     """
     sup.print_performed_task('Reading log traces ')
     log = pd.read_csv(self.input, dtype={'user': str})
     if self.one_timestamp:
         self.column_names['Complete Timestamp'] = 'end_timestamp'
         log = log.rename(columns=self.column_names)
         log = log.astype({'caseid': object})
         log = (log[(log.task != 'Start') & (log.task != 'End')]
                .reset_index(drop=True))
         if self.filter_d_attrib:
             log = log[['caseid', 'task', 'user', 'end_timestamp']]
         log['end_timestamp'] = pd.to_datetime(log['end_timestamp'],
                                               format=self.timeformat)
     else:
         self.column_names['Start Timestamp'] = 'start_timestamp'
         self.column_names['Complete Timestamp'] = 'end_timestamp'
         log = log.rename(columns=self.column_names)
         log = log.astype({'caseid': object})
         log = (log[(log.task != 'Start') & (log.task != 'End')]
                .reset_index(drop=True))
         if self.filter_d_attrib:
             log = log[['caseid', 'task', 'user',
                        'start_timestamp', 'end_timestamp']]
         log['start_timestamp'] = pd.to_datetime(log['start_timestamp'],
                                                 format=self.timeformat)
         log['end_timestamp'] = pd.to_datetime(log['end_timestamp'],
                                               format=self.timeformat)
     log['user'].fillna('SYS', inplace=True)
     self.data = log.to_dict('records')
     self.append_csv_start_end()
     self.split_event_transitions()
     sup.print_done_task()
コード例 #3
0
 def align_traces(self):
     """
     This method is the kernel of the alignment process
     """
     aligned_traces = list()
     i = 0
     size = len(self.traces)
     for trace in self.traces:
         # Remove Start and End events
         trace = [x for x in trace if x['task'] not in ['Start', 'End']]
         try:
             # Alignment of each trace
             aligned_trace = self.process_trace(trace)
             if self.one_timestamp:
                 aligned_trace = sorted(aligned_trace,
                                        key=itemgetter('end_timestamp'))
                 aligned_trace = self.append_start_end(aligned_trace)
                 aligned_traces.extend(aligned_trace)
             else:
                 # completeness check and reformating
                 aligned_trace = self.trace_verification(aligned_trace)
                 if aligned_trace:
                     aligned_trace = self.append_start_end(aligned_trace)
                     aligned_traces.extend(aligned_trace)
         except Exception as e:
             next
         sup.print_progress(((i / (size - 1)) * 100),
                            'Aligning log traces with model ')
         i += 1
     sup.print_done_task()
     return aligned_traces
コード例 #4
0
def predict(model, prefixes, imp, max_trace_size):
    """Generate business process suffixes using a keras trained model.
    Args:
        model (keras model): keras trained model.
        prefixes (list): list of prefixes.
        imp (str): method of next event selection.
    """
    # Generation of predictions
    for prefix in prefixes:
        # Activities and roles input shape(1,5)
        x_ac_ngram = np.append(
                np.zeros(DIM['time_dim']),
                np.array(prefix['ac_pref']),
                axis=0)[-DIM['time_dim']:].reshape((1,DIM['time_dim']))
                
        x_rl_ngram = np.append(
                np.zeros(DIM['time_dim']),
                np.array(prefix['rl_pref']),
                axis=0)[-DIM['time_dim']:].reshape((1,DIM['time_dim']))

        # times input shape(1,5,1)
        x_t_ngram = np.array([np.append(
                np.zeros(DIM['time_dim']),
                np.array(prefix['t_pref']),
                axis=0)[-DIM['time_dim']:].reshape((DIM['time_dim'], 1))])
        acum_tbtw = 0
        ac_suf, rl_suf = list(), list()
        for _  in range(1, max_trace_size):
            predictions = model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram])
            if imp == 'Random Choice':
                # Use this to get a random choice following as PDF the predictions
                pos = np.random.choice(np.arange(0, len(predictions[0][0])), p=predictions[0][0])
                pos1 = np.random.choice(np.arange(0, len(predictions[1][0])), p=predictions[1][0])
            elif imp == 'Arg Max':
                # Use this to get the max prediction
                pos = np.argmax(predictions[0][0])
                pos1 = np.argmax(predictions[1][0])
            # Activities accuracy evaluation
            x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1)
            x_ac_ngram = np.delete(x_ac_ngram, 0, 1)
            x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1)
            x_rl_ngram = np.delete(x_rl_ngram, 0, 1)
            x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1)
            x_t_ngram = np.delete(x_t_ngram, 0, 1)
            # Stop if the next prediction is the end of the trace
            # otherwise until the defined max_size
            ac_suf.append(pos)
            rl_suf.append(pos1)
            if EXP['norm_method'] == 'lognorm':
                acum_tbtw += math.expm1(predictions[2][0][0] * TBTW['max_tbtw'])
            else:
                acum_tbtw += np.rint(predictions[2][0][0] * TBTW['max_tbtw'])
            if INDEX_AC[pos] == 'end':
                break
        prefix['ac_suff_pred'] = ac_suf
        prefix['rl_suff_pred'] = rl_suf
        prefix['rem_time_pred'] = acum_tbtw
    sup.print_done_task()
    return prefixes
コード例 #5
0
def define_interarrival_tasks(process_graph, conformed_traces):
	# Analysis of start tasks
	tasks = analize_first_tasks(process_graph)
	inter_arrival_times = find_inter_arrival(tasks, conformed_traces)

	# for task in tasks:
	# 	process_graph.node[task]['type']
	sup.print_done_task()
	return inter_arrival_times
コード例 #6
0
def replay(process_graph, traces):
    start_tasks_list, end_tasks_list = find_start_finish_tasks(process_graph)
    subsec_set = create_subsec_set(process_graph)
    parallel_gt_exec = parallel_execution_list(process_graph)
    not_conformant_traces = list()
    conformant_traces = list()
    for index in range(0, len(traces)):
        trace = traces[index][1:-1] # Take out start and end event
        current_node = find_task_node(process_graph, trace[0]['task'])
        last_node = find_task_node(process_graph, trace[-1]['task'])
        # Check if is a complete trace
        if (current_node in start_tasks_list) and (last_node in end_tasks_list):
            temp_gt_exec = parallel_gt_exec
            cursor = list()
            cursor.append(current_node)
            removal_allowed = True
            is_conformant = True
            for i in range(1, len(trace)):
                next_node = find_task_node(process_graph,trace[i]['task'])
                # If loop management
                if next_node == cursor[-1]:
                    process_graph.node[next_node]['executions'] += 1
                else:
                    try:
                        cursor, prev_node = update_cursor(next_node, process_graph, cursor)
                    except:
                        is_conformant = False
                        break
                    for element in reversed(cursor[:-1]):
                        # Process AND
                        if process_graph.node[element]['type'] == 'gate3':
                            gate = [d for d in temp_gt_exec if d['nod_num'] == element][0]
                            gate.update(dict(executed= gate['executed'] + 1))
                            if gate['executed'] < gate['num_paths']:
                                removal_allowed = False
                            else:
                                removal_allowed = True
                                cursor.remove(element)
                        # Process Task
                        elif process_graph.node[element]['type'] == 'task':
                            if (element,next_node) in subsec_set:
                                if removal_allowed:
                                    cursor.remove(element)
                        # Process other
                        else:
                            if removal_allowed:
                                cursor.remove(element)
            if not is_conformant:
                not_conformant_traces.append(trace)
            else:
                conformant_traces.append(traces[index]) # Append the original one
        else:
            # If it is not a complete trace
            not_conformant_traces.append(trace)
        sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ')
    sup.print_done_task()
    return conformant_traces, not_conformant_traces
コード例 #7
0
def predict(model, prefixes, ac_alias, rl_alias, imp, max_trace_size):
    """Generate business process suffixes using a keras trained model.
    Args:
        model (keras model): keras trained model.
        prefixes (list): list of prefixes.
        ac_index (dict): index of activities.
        rl_index (dict): index of roles.
        imp (str): method of next event selection.
        max_trace_size (int): max size of the trace
    """
    # Generation of predictions
    for prefix in prefixes:
        x_trace = list()
        x_ac_ngram = np.array([prefix['ac_pref']])
        x_rl_ngram = np.array([prefix['rl_pref']])
        x_t_ngram = np.array([prefix['t_pref']])

        acum_tbtw = 0
        ac_suf, rl_suf = '', ''
        for _  in range(1, max_trace_size):
            predictions = model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram])
            if imp == 'Random Choice':
                # Use this to get a random choice following as PDF the predictions
                pos = np.random.choice(np.arange(0, len(predictions[0][0])), p=predictions[0][0])
                pos1 = np.random.choice(np.arange(0, len(predictions[1][0])), p=predictions[1][0])

            elif imp == 'Arg Max':
                # Use this to get the max prediction
                pos = np.argmax(predictions[0][0])
                pos1 = np.argmax(predictions[1][0])
            x_trace.append([pos, pos1, predictions[2][0][0]])
           # Add prediction to n_gram
            x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1)
            x_ac_ngram = np.delete(x_ac_ngram, 0, 1)
            x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1)
            x_rl_ngram = np.delete(x_rl_ngram, 0, 1)
            x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1)
            x_t_ngram = np.delete(x_t_ngram, 0, 1)
            # Stop if the next prediction is the end of the trace
            # otherwise until the defined max_size
            if INDEX_AC[pos] == 'end':
                break
            else:
                ac_suf += ac_alias[pos]
                rl_suf += rl_alias[pos1]
                if EXP['norm_method'] == 'lognorm':
                    acum_tbtw += math.expm1(predictions[2][0][0] * TBTW['max_tbtw'])
                else:
                    acum_tbtw += np.rint(predictions[2][0][0] * TBTW['max_tbtw'])

        prefix['ac_suf_pred'] = ac_suf
        prefix['rl_suf_pred'] = rl_suf
        prefix['rem_time_pred'] = acum_tbtw
        # sup.print_progress((((case+1) / num_cases)* 100), 'Generating process traces ')
        # case += 1
    sup.print_done_task()
    return prefixes
コード例 #8
0
    def get_xes_events_data(self, filename,start_timeformat, end_timeformat, ns_include, one_timestamp):
        """reads and parse all the events information from a xes file"""
        temp_data = list()
        tree = ET.parse(filename)
        root = tree.getroot()
        if ns_include:
            #TODO revisar como poder cargar el mane space de forma automatica del root
            ns = {'xes': root.tag.split('}')[0].strip('{')}
            tags = dict(trace='xes:trace',string='xes:string',event='xes:event',date='xes:date')
        else:
            ns = {'xes':''}
            tags = dict(trace='trace',string='string',event='event',date='date')
        traces = root.findall(tags['trace'], ns)
        i = 0
        sup.print_performed_task('Reading log traces ')
        for trace in traces:
#            sup.print_progress(((i / (len(traces) - 1)) * 100), 'Reading log traces ')
            caseid = ''
            for string in trace.findall(tags['string'], ns):
                if string.attrib['key'] == 'concept:name':
                    caseid = string.attrib['value']
            for event in trace.findall(tags['event'], ns):
                task = ''
                user = ''
                event_type = ''
                complete_timestamp = ''
                for string in event.findall(tags['string'], ns):
                    if string.attrib['key'] == 'concept:name':
                        task = string.attrib['value']                        
                    if string.attrib['key'] == 'org:resource':
                        user = string.attrib['value']
                    if string.attrib['key'] == 'lifecycle:transition':
                        event_type = string.attrib['value'].lower()
                    if string.attrib['key'] == 'Complete_Timestamp':
                        complete_timestamp = string.attrib['value']
                        if complete_timestamp != 'End':
                            complete_timestamp = datetime.datetime.strptime(complete_timestamp, end_timeformat)
                timestamp = ''
                for date in event.findall(tags['date'], ns):
                    if date.attrib['key'] == 'time:timestamp':
                        timestamp = date.attrib['value']
                        try:
                            timestamp = datetime.datetime.strptime(timestamp[:-6], start_timeformat)
                        except ValueError:
                            timestamp = datetime.datetime.strptime(timestamp, start_timeformat)
                if not (task == '0' or task == '-1'):
                    temp_data.append(
                        dict(caseid=caseid, task=task, event_type=event_type, user=user, start_timestamp=timestamp,
                             end_timestamp=complete_timestamp))
            i += 1
        raw_data = temp_data
        temp_data = self.reorder_xes(temp_data, one_timestamp)
        sup.print_done_task()
        return temp_data, raw_data
コード例 #9
0
def predict(model, prefixes, ac_alias, rl_alias, imp):
    """Generate business process suffixes using a keras trained model.
    Args:
        model (keras model): keras trained model.
        prefixes (list): list of prefixes.
        ac_index (dict): index of activities.
        rl_index (dict): index of roles.
        imp (str): method of next event selection.
    """
    # Generation of predictions
    for prefix in prefixes:

        # Activities and roles input shape(1,5)
        x_ac_ngram = np.append(np.zeros(DIM['time_dim']),
                               np.array(prefix['ac_pref']),
                               axis=0)[-DIM['time_dim']:].reshape(
                                   (1, DIM['time_dim']))

        x_rl_ngram = np.append(np.zeros(DIM['time_dim']),
                               np.array(prefix['rl_pref']),
                               axis=0)[-DIM['time_dim']:].reshape(
                                   (1, DIM['time_dim']))

        # times input shape(1,5,1)
        x_t_ngram = np.array([
            np.append(np.zeros(DIM['time_dim']),
                      np.array(prefix['t_pref']),
                      axis=0)[-DIM['time_dim']:].reshape((DIM['time_dim'], 1))
        ])

        predictions = model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram])
        if imp == 'Random Choice':
            # Use this to get a random choice following as PDF the predictions
            pos = np.random.choice(np.arange(0, len(predictions[0][0])),
                                   p=predictions[0][0])
            pos1 = np.random.choice(np.arange(0, len(predictions[1][0])),
                                    p=predictions[1][0])
        elif imp == 'Arg Max':
            # Use this to get the max prediction
            pos = np.argmax(predictions[0][0])
            pos1 = np.argmax(predictions[1][0])
        # Activities accuracy evaluation
        if pos == prefix['ac_next']:
            prefix['ac_true'] = 1
        else:
            prefix['ac_true'] = 0
        # Roles accuracy evaluation
        if pos1 == prefix['rl_next']:
            prefix['rl_true'] = 1
        else:
            prefix['rl_true'] = 0
    sup.print_done_task()
    return prefixes
コード例 #10
0
def role_discovery(data, drawing, sim_percentage):
    tasks = list(set(list(map(lambda x: x[0], data))))
    try:
        tasks.remove('Start')
    except Exception:
        pass
    tasks = [dict(index=i, data=tasks[i]) for i in range(0, len(tasks))]
    users = list(set(list(map(lambda x: x[1], data))))
    try:
        users.remove('Start')
    except Exception:
        pass
    users = [dict(index=i, data=users[i]) for i in range(0, len(users))]
    data_transform = list(
        map(lambda x: [find_index(tasks, x[0]),
                       find_index(users, x[1])], data))
    unique = list(set(tuple(i) for i in data_transform))
    unique = [list(i) for i in unique]
    # [print(uni) for uni in users]
    # building of a task-size profile of task execution per resource
    profiles = build_profile(users, det_freq_matrix(unique, data_transform),
                             len(tasks))
    sup.print_performed_task('Analysing resource pool ')
    #    sup.print_progress(((20 / 100)* 100),'Analysing resource pool ')
    # building of a correlation matrix between resouces profiles
    correlation_matrix = det_correlation_matrix(profiles)
    #    sup.print_progress(((40 / 100)* 100),'Analysing resource pool ')
    # creation of a relation network between resouces
    g = nx.Graph()
    for user in users:
        g.add_node(user['index'])
    for relation in correlation_matrix:
        # creation of edges between nodes excluding the same element correlation
        # and those below the 0.7 threshold of similarity
        if relation['distance'] > sim_percentage and relation['x'] != relation[
                'y']:
            g.add_edge(relation['x'],
                       relation['y'],
                       weight=relation['distance'])
#    sup.print_progress(((60 / 100)* 100),'Analysing resource pool ')
# extraction of fully conected subgraphs as roles
    sub_graphs = list(nx.connected_component_subgraphs(g))
    #    sup.print_progress(((80 / 100)* 100),'Analysing resource pool ')
    # role definition from graph
    roles = role_definition(sub_graphs, users)
    # plot creation (optional)
    if drawing == True:
        graph_network(g, sub_graphs)


#    sup.print_progress(((100 / 100)* 100),'Analysing resource pool ')
    sup.print_done_task()
    return roles
コード例 #11
0
    def get_mxml_events_data(self, filename, start_timeformat, end_timeformat):
        """read and parse all the events information from a MXML file"""
        temp_data = list()
        tree = ET.parse(filename)
        root = tree.getroot()
        process = root.find('Process')
        procInstas = process.findall('ProcessInstance')
        i = 0
        for procIns in procInstas:
            sup.print_progress(((i / (len(procInstas) - 1)) * 100),
                               'Reading log traces ')
            caseid = procIns.get('id')
            complete_timestamp = ''
            auditTrail = procIns.findall('AuditTrailEntry')
            for trail in auditTrail:
                task = ''
                user = ''
                event_type = ''
                type_task = ''
                timestamp = ''
                attributes = trail.find('Data').findall('Attribute')
                for attr in attributes:
                    if (attr.get('name') == 'concept:name'):
                        task = attr.text
                    if (attr.get('name') == 'lifecycle:transition'):
                        event_type = attr.text
                    if (attr.get('name') == 'org:resource'):
                        user = attr.text
                    if (attr.get('name') == 'type_task'):
                        type_task = attr.text
                work_flow_ele = trail.find('WorkflowModelElement').text
                event_type = trail.find('EventType').text
                timestamp = trail.find('Timestamp').text
                originator = trail.find('Originator').text
                timestamp = datetime.datetime.strptime(
                    trail.find('Timestamp').text[:-6], start_timeformat)
                temp_data.append(
                    dict(caseid=caseid,
                         task=task,
                         event_type=event_type,
                         user=user,
                         start_timestamp=timestamp,
                         end_timestamp=timestamp))

            i += 1
        raw_data = temp_data
        temp_data = self.reorder_mxml(temp_data)
        sup.print_done_task()
        return temp_data, raw_data
コード例 #12
0
    def _predict_event_log_shared_cat(self, parms):
        """Generate business process traces using a keras trained model.
        Args:
            model (keras model): keras trained model.
            imp (str): method of next event selection.
            num_cases (int): number of traces to generate.
            max_trace_size (int): max size of the trace
        """
        sup.print_performed_task('Generating traces')
        generated_event_log = list()
        for case in range(0, parms['num_cases']):
            x_trace = list()
            x_ac_ngram = np.zeros(
                (1, parms['dim']['time_dim']), dtype=np.float32)
            x_rl_ngram = np.zeros(
                (1, parms['dim']['time_dim']), dtype=np.float32)
            x_t_ngram = np.zeros(
                (1, parms['dim']['time_dim'], 1), dtype=np.float32)
            # TODO: add intercase support
            for _ in range(1, self.max_trace_size):
                predictions = self.model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram])
                if self.imp == 'Random Choice':
                    # Use this to get a random choice following as PDF
                    pos = np.random.choice(
                        np.arange(0, len(predictions[0][0])),
                        p=predictions[0][0])
                    pos1 = np.random.choice(
                        np.arange(0, len(predictions[1][0])),
                        p=predictions[1][0])
                elif self.imp == 'Arg Max':
                    # Use this to get the max prediction
                    pos = np.argmax(predictions[0][0])
                    pos1 = np.argmax(predictions[1][0])
                x_trace.append([pos, pos1, predictions[2][0][0]])
    #            # Add prediction to n_gram
                x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1)
                x_ac_ngram = np.delete(x_ac_ngram, 0, 1)
                x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1)
                x_rl_ngram = np.delete(x_rl_ngram, 0, 1)
                x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1)
                x_t_ngram = np.delete(x_t_ngram, 0, 1)

    #            # Stop if the next prediction is the end of the trace
    #            # otherwise until the defined max_size
                if parms['index_ac'][pos] == 'end':
                    break
            generated_event_log.extend(self.decode_trace(parms, x_trace, case))
        sup.print_done_task()
        return generated_event_log
コード例 #13
0
def define_probabilities(process_graph,bpmn,log, type):
    # Analisys of gateways probabilities
    if (type==1):
        gateways = analize_gateways(process_graph,log)
    elif(type==2):
        gateways = analize_gateways_random(process_graph,log)
    elif(type==3):
        gateways = analize_gateways_equi(process_graph,log)
    # Creating response list
    response = list()
    gateways=normalize_probabilities(process_graph,gateways)
    for gateway in gateways:
        # print("gateway prob", process_graph.node[gateway['gate']]['id'])
        gatewayId = process_graph.node[gateway['gate']]['id']
        for path in gateway['targets']:
            sequence_id = bpmn.find_sequence_id(process_graph.node[gateway['gate']]['id'],process_graph.node[path['out_node']]['id'])
            response.append(dict(gatewayid=gatewayId,elementid=sequence_id,prob=path['probability']))
    sup.print_done_task()
    return response
コード例 #14
0
ファイル: role_discovery.py プロジェクト: dtdi/Simod
 def discover_roles(self):
     associations = lambda x: (self.tasks[x['task']], self.users[x['user']])
     self.data['ac_rl'] = self.data.apply(associations, axis=1)
 
     freq_matrix = (self.data.groupby(by='ac_rl')['task']
                    .count()
                    .reset_index()
                    .rename(columns={'task': 'freq'}))
     freq_matrix = {x['ac_rl']: x['freq'] for x in freq_matrix.to_dict('records')}
     
     profiles = self.build_profile(freq_matrix)
 
     sup.print_progress(((20 / 100)* 100),'Analysing resource pool ')
     # building of a correl matrix between resouces profiles
     correl_matrix = self.det_correl_matrix(profiles)
     sup.print_progress(((40 / 100)* 100),'Analysing resource pool ')
     # creation of a rel network between resouces
     g = nx.Graph()
     for user in self.users.values():
         g.add_node(user)
     for rel in correl_matrix:
         # creation of edges between nodes excluding the same elements
         # and those below the similarity threshold 
         if rel['distance'] > self.sim_threshold and rel['x'] != rel['y']:
             g.add_edge(rel['x'],
                        rel['y'],
                        weight=rel['distance'])
     sup.print_progress(((60 / 100) * 100),'Analysing resource pool ')
     # extraction of fully conected subgraphs as roles
     sub_graphs = list(nx.connected_component_subgraphs(g))
     sup.print_progress(((80 / 100) * 100),'Analysing resource pool ')
     # role definition from graph
     roles = self.role_definition(sub_graphs)
     # plot creation (optional)
     # if drawing == True:
     #     graph_network(g, sub_graphs)
     sup.print_progress(((100 / 100)* 100),'Analysing resource pool ')
     sup.print_done_task()
     return roles
コード例 #15
0
ファイル: traces_alignment.py プロジェクト: AndreRdz7/Simod
def align_traces(log, settings):
    """this method is the kernel of all the alignment process"""
    evaluate_alignment(settings)
    optimal_alignments = read_alignment_info(settings['aligninfo'])
    traces_alignments = traces_alignment_type(settings['aligntype'])
    raw_traces=log.get_raw_traces()
    aligned_traces = list()
    i = 0
    size = len(raw_traces)
    for raw_trace in raw_traces:
        try:
            # Alignment of each trace
            aligned_trace = process_trace(raw_trace, optimal_alignments, traces_alignments )
            # Conformity check and reformating
            aligned_trace = trace_verification(aligned_trace, raw_trace)
            if aligned_trace:
                aligned_traces.extend(aligned_trace)
        except Exception as e:
            print(str(e))
        sup.print_progress(((i / (size-1))* 100),'Aligning log traces with model ')
        i += 1
    sup.print_done_task()
    return aligned_traces
コード例 #16
0
    def define_probabilities(self) -> None:
        """
        Defines the gateways' probabilities according with an spcified method

        """
        sup.print_performed_task('Analysing gateways` probabilities')
        # Analisys of gateways probabilities
        if self.method == 'discovery':
            gateways = self.analize_gateways()
        elif self.method == 'random':
            gateways = self.analize_gateways_random()
        elif self.method == 'equiprobable':
            gateways = self.analize_gateways_equi()
        # Fix 0 probabilities and float error sums
        gateways = self.normalize_probabilities(gateways)
        # Creating response list
        gids = lambda x: self.process_graph.node[x['gate']]['id']
        gateways['gatewayid'] = gateways.apply(gids, axis=1)
        gids = lambda x: self.process_graph.node[x['t_path']]['id']
        gateways['out_path_id'] = gateways.apply(gids, axis=1)
        self.probabilities = gateways[['gatewayid', 'out_path_id',
                                       'prob']].to_dict('records')
        sup.print_done_task()
コード例 #17
0
ファイル: log_replayer.py プロジェクト: Zohaib94/Simod
 def replay(self) -> None:
     """
     Replays the event-log traces over the BPMN model
     """
     for index in range(0, len(self.traces)):
         t_times = list()
         trace = self.traces[index][1:-1]  # remove start and end event
         # Check if is a complete trace
         current_node = self.find_task_node(self.model, trace[0]['task'])
         last_node = self.find_task_node(self.model, trace[-1]['task'])
         if current_node not in self.start_tasks_list:
             self.not_conformant_traces.append(trace)
             continue
         if last_node not in self.end_tasks_list:
             self.not_conformant_traces.append(trace)
             continue
         # Initialize
         temp_gt_exec = self.parallel_gt_exec
         cursor = [current_node]
         remove = True
         is_conformant = True
         # ----time recording------
         t_times = self.save_record(t_times, trace, 0)
         # ------------------------
         for i in range(1, len(trace)):
             nnode = self.find_task_node(self.model, trace[i]['task'])
             # If loop management
             if nnode == cursor[-1]:
                 t_times = self.save_record(t_times, trace, i, nnode)
                 self.model.node[nnode]['executions'] += 1
                 continue
             try:
                 cursor, pnode = self.update_cursor(nnode, self.model,
                                                    cursor)
                 # ----time recording------
                 t_times = self.save_record(t_times, trace, i, pnode)
                 self.model.node[nnode]['executions'] += 1
                 # ------------------------
             except:
                 is_conformant = False
                 break
             for element in reversed(cursor[:-1]):
                 element_type = self.model.node[element]['type']
                 # Process AND
                 if element_type == 'gate3':
                     gate = [
                         d for d in temp_gt_exec if d['nod_num'] == element
                     ][0]
                     gate.update({'executed': gate['executed'] + 1})
                     if gate['executed'] < gate['num_paths']:
                         remove = False
                     else:
                         remove = True
                         cursor.remove(element)
                 # Process Task
                 elif element_type == 'task':
                     if (element, nnode) in self.subsec_set and remove:
                         cursor.remove(element)
                 # Process other
                 elif remove:
                     cursor.remove(element)
         if is_conformant:
             # Append the original one
             self.conformant_traces.extend(self.traces[index])
             self.process_stats.extend(t_times)
         else:
             self.not_conformant_traces.extend(trace)
         sup.print_progress(((index / (len(self.traces) - 1)) * 100),
                            'Replaying process traces ')
     self.calculate_process_metrics()
     sup.print_done_task()
コード例 #18
0
    def _predict_suffix_shared_cat(self, parms):
        """Generate business process suffixes using a keras trained model.
        Args:
            model (keras model): keras trained model.
            prefixes (list): list of prefixes.
            ac_index (dict): index of activities.
            rl_index (dict): index of roles.
            imp (str): method of next event selection.
        """
        # Generation of predictions
        results = list()
        for i, _ in enumerate(self.spl['prefixes']['activities']):
            # Activities and roles input shape(1,5)
            x_ac_ngram = np.append(np.zeros(parms['dim']['time_dim']),
                                   np.array(
                                       self.spl['prefixes']['activities'][i]),
                                   axis=0)[-parms['dim']['time_dim']:].reshape(
                                       (1, parms['dim']['time_dim']))

            x_rl_ngram = np.append(np.zeros(parms['dim']['time_dim']),
                                   np.array(self.spl['prefixes']['roles'][i]),
                                   axis=0)[-parms['dim']['time_dim']:].reshape(
                                       (1, parms['dim']['time_dim']))

            # Times input shape(1,5,1)
            x_t_ngram = np.array([
                np.append(np.zeros(parms['dim']['time_dim']),
                          np.array(self.spl['prefixes']['times'][i]),
                          axis=0)[-parms['dim']['time_dim']:].reshape(
                              (parms['dim']['time_dim'], 1))
            ])
            if parms['model_type'] in ['shared_cat', 'cnn_lstm']:
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram]
            elif parms['model_type'] in [
                    'shared_cat_inter', 'shared_cat_inter_full',
                    'shared_cat_rd', 'shared_cat_wl', 'shared_cat_cx',
                    'cnn_lstm_inter', 'cnn_lstm_inter_full', 'shared_cat_city',
                    'shared_cat_snap'
            ]:
                inter_attr_num = self.spl['prefixes']['inter_attr'][i].shape[1]
                x_inter_ngram = np.array([
                    np.append(np.zeros(
                        (parms['dim']['time_dim'], inter_attr_num)),
                              self.spl['prefixes']['inter_attr'][i],
                              axis=0)[-parms['dim']['time_dim']:].reshape(
                                  (parms['dim']['time_dim'], inter_attr_num))
                ])
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram]

            pref_size = len(self.spl['prefixes']['activities'][i])
            acum_dur = list()
            ac_suf, rl_suf = list(), list()
            for _ in range(1, self.max_trace_size):
                predictions = self.model.predict(inputs)
                if self.imp == 'Random Choice':
                    # Use this to get a random choice following as PDF the predictions
                    pos = np.random.choice(np.arange(0,
                                                     len(predictions[0][0])),
                                           p=predictions[0][0])
                    pos1 = np.random.choice(np.arange(0,
                                                      len(predictions[1][0])),
                                            p=predictions[1][0])
                elif self.imp == 'Arg Max':
                    # Use this to get the max prediction
                    pos = np.argmax(predictions[0][0])
                    pos1 = np.argmax(predictions[1][0])
                # Activities accuracy evaluation
                x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1)
                x_ac_ngram = np.delete(x_ac_ngram, 0, 1)
                x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1)
                x_rl_ngram = np.delete(x_rl_ngram, 0, 1)
                x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1)
                x_t_ngram = np.delete(x_t_ngram, 0, 1)
                if parms['model_type'] in ['shared_cat', 'cnn_lstm']:
                    inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram]
                elif parms['model_type'] in [
                        'shared_cat_inter', 'shared_cat_inter_full',
                        'shared_cat_rd', 'shared_cat_wl', 'shared_cat_cx',
                        'cnn_lstm_inter', 'cnn_lstm_inter_full',
                        'shared_cat_city', 'shared_cat_snap'
                ]:
                    x_inter_ngram = np.append(x_inter_ngram, [predictions[3]],
                                              axis=1)
                    x_inter_ngram = np.delete(x_inter_ngram, 0, 1)
                    inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram]
                # Stop if the next prediction is the end of the trace
                # otherwise until the defined max_size
                ac_suf.append(pos)
                rl_suf.append(pos1)
                acum_dur.append(self.rescale(predictions[2][0][0], parms))
                if parms['index_ac'][pos] == 'end':
                    break
            results.append({
                'ac_pref':
                self.spl['prefixes']['activities'][i],
                'ac_pred':
                ac_suf,
                'ac_expect':
                self.spl['suffixes']['activities'][i],
                'rl_pref':
                self.spl['prefixes']['roles'][i],
                'rl_pred':
                rl_suf,
                'rl_expect':
                self.spl['suffixes']['roles'][i],
                'tm_pref': [
                    self.rescale(x, parms)
                    for x in self.spl['prefixes']['times'][i]
                ],
                'tm_pred':
                acum_dur,
                'tm_expect': [
                    self.rescale(x, parms)
                    for x in self.spl['suffixes']['times'][i]
                ],
                'pref_size':
                pref_size
            })
        sup.print_done_task()
        return results
コード例 #19
0
    def _predict_suffix_seq2seq(self, parms):
        """Generate business process suffixes using a keras trained model.
        Args:
            model (keras model): keras trained model.
            prefixes (list): list of prefixes.
            ac_index (dict): index of activities.
            rl_index (dict): index of roles.
            imp (str): method of next event selection.
        """
        # Generation of predictions
        results = list()
        for i in range(0, len(self.spl['prefixes']['activities'])):
            act_prefix = self.spl['prefixes']['activities'][i].reshape(
                (1, self.spl['prefixes']['activities'][i].shape[0]))
            rl_prefix = self.spl['prefixes']['roles'][i].reshape(
                (1, self.spl['prefixes']['roles'][i].shape[0]))
            times_prefix = self.spl['prefixes']['times'][i].reshape(
                (1, self.spl['prefixes']['times'][i].shape[0],
                 self.spl['prefixes']['times'][i].shape[1]))
            if parms['model_type'] == 'seq2seq':
                inputs = [act_prefix, rl_prefix, times_prefix]
            elif parms['model_type'] == 'seq2seq_inter':
                inter_prefix = self.spl['prefixes']['inter_attr'][i].reshape(
                    (1, self.spl['prefixes']['inter_attr'][i].shape[0],
                     self.spl['prefixes']['inter_attr'][i].shape[1]))
                inputs = [act_prefix, rl_prefix, times_prefix, inter_prefix]

            pref_size = len([
                x for x in self.spl['prefixes']['activities'][i][1:] if x > 0
            ])
            predictions = self.model.predict(inputs)
            if self.imp == 'Random Choice':
                # Use this to get a random choice following as PDF
                act_pred = [
                    np.random.choice(np.arange(0, len(x)), p=x)
                    for x in predictions[0][0]
                ]
                rl_pred = [
                    np.random.choice(np.arange(0, len(x)), p=x)
                    for x in predictions[1][0]
                ]
            elif self.imp == 'Arg Max':
                # Use this to get the max prediction
                act_pred = [np.argmax(x) for x in predictions[0][0]]
                rl_pred = [np.argmax(x) for x in predictions[1][0]]
            # Activities accuracy evaluation
            if act_pred[0] == self.spl['suffixes']['activities'][i][0]:
                results['ac_true'].append(1)
            else:
                results['ac_true'].append(0)
            # Roles accuracy evaluation
            if rl_pred[0] == self.spl['suffixes']['roles'][i][0]:
                results['rl_true'].append(1)
            else:
                results['rl_true'].append(0)
            # Activities suffixes
            idx = self.define_pred_index(act_pred, parms)
            act_pred = act_pred[:idx]
            rl_pred = rl_pred[:idx]
            time_pred = predictions[2][0][:idx]
            if parms['norm_method'] == 'lognorm':
                time_pred = np.expm1(np.multiply(time_pred, parms['max_dur']))
            else:
                time_pred = np.rint(np.multiply(time_pred, parms['max_dur']))

            time_expected = 0
            if parms['norm_method'] == 'lognorm':
                time_expected = np.expm1(
                    np.multiply(self.spl['suffixes']['times'][i],
                                parms['max_dur']))
            else:
                time_expected = np.rint(
                    np.multiply(self.spl['suffixes']['times'][i],
                                parms['max_dur']))
            # Append results
            results.append({
                'ac_pref': self.spl['prefixes']['activities'][i],
                'ac_pred': act_pred,
                'ac_expec': self.spl['suffixes']['activities'][i],
                'rl_pref': self.spl['prefixes']['roles'][i],
                'rl_pred': rl_pred,
                'rl_expec': self.spl['suffixes']['roles'][i],
                'tm_pref': self.spl['prefixes']['times'][i],
                'tm_pred': time_pred,
                'tm_expect': time_expected,
                'pref_size': pref_size
            })
        sup.print_done_task()
        return results
コード例 #20
0
def extract_parameters(log, bpmn, process_graph):
    if bpmn != None and log != None:
        bpmnId = bpmn.getProcessId()
        startEventId = bpmn.getStartEventId()
        # Creation of process graph
        #-------------------------------------------------------------------
        # Analysing resource pool LV917 or 247
        roles, resource_table = rl.read_resource_pool(log,
                                                      drawing=False,
                                                      sim_percentage=0.5)
        resource_pool, time_table, resource_table = sch.analize_schedules(
            resource_table, log, True, '247')
        #-------------------------------------------------------------------
        # Process replaying
        conformed_traces, not_conformed_traces, process_stats = rpl.replay(
            process_graph, log)
        # -------------------------------------------------------------------
        # Adding role to process stats
        for stat in process_stats:
            role = list(
                filter(lambda x: x['resource'] == stat['resource'],
                       resource_table))[0]['role']
            stat['role'] = role
        #-------------------------------------------------------------------
        # Determination of first tasks for calculate the arrival rate
        inter_arrival_times = arr.define_interarrival_tasks(
            process_graph, conformed_traces)
        arrival_rate_bimp = (td.get_task_distribution(inter_arrival_times, 50))
        arrival_rate_bimp['startEventId'] = startEventId
        #-------------------------------------------------------------------
        # Gateways probabilities 1=Historycal, 2=Random, 3=Equiprobable
        sequences = gt.define_probabilities(process_graph, bpmn, log, 1)
        #-------------------------------------------------------------------
        # Tasks id information
        elements_data = list()
        i = 0
        task_list = list(
            filter(lambda x: process_graph.node[x]['type'] == 'task',
                   list(nx.nodes(process_graph))))
        for task in task_list:
            task_name = process_graph.node[task]['name']
            task_id = process_graph.node[task]['id']
            values = list(
                filter(lambda x: x['task'] == task_name, process_stats))
            task_processing = [x['processing_time'] for x in values]
            dist = td.get_task_distribution(task_processing)
            max_role, max_count = '', 0
            role_sorted = sorted(values, key=lambda x: x['role'])
            for key2, group2 in itertools.groupby(role_sorted,
                                                  key=lambda x: x['role']):
                group_count = list(group2)
                if len(group_count) > max_count:
                    max_count = len(group_count)
                    max_role = key2
            elements_data.append(
                dict(id=sup.gen_id(),
                     elementid=task_id,
                     type=dist['dname'],
                     name=task_name,
                     mean=str(dist['dparams']['mean']),
                     arg1=str(dist['dparams']['arg1']),
                     arg2=str(dist['dparams']['arg2']),
                     resource=find_resource_id(resource_pool, max_role)))
            sup.print_progress(((i / (len(task_list) - 1)) * 100),
                               'Analysing tasks data ')
            i += 1
        sup.print_done_task()
        parameters = dict(arrival_rate=arrival_rate_bimp,
                          time_table=time_table,
                          resource_pool=resource_pool,
                          elements_data=elements_data,
                          sequences=sequences,
                          instances=len(conformed_traces),
                          bpmnId=bpmnId)
        return parameters, process_stats
コード例 #21
0
    def _predict_next_event_shared_cat(self, parameters):
        """Generate business process suffixes using a keras trained model.
        Args:
            model (keras model): keras trained model.
            prefixes (list): list of prefixes.
            ac_index (dict): index of activities.
            rl_index (dict): index of roles.
            imp (str): method of next event selection.
        """
        # Generation of predictions
        results = list()
        for i, _ in enumerate(self.spl['prefixes']['activities']):
            # Activities and roles input shape(1,5)
            x_ac_ngram = (np.append(
                np.zeros(parameters['dim']['time_dim']),
                np.array(self.spl['prefixes']['activities'][i]),
                axis=0)[-parameters['dim']['time_dim']:].reshape(
                    (1, parameters['dim']['time_dim'])))

            x_rl_ngram = (np.append(
                np.zeros(parameters['dim']['time_dim']),
                np.array(self.spl['prefixes']['roles'][i]),
                axis=0)[-parameters['dim']['time_dim']:].reshape(
                    (1, parameters['dim']['time_dim'])))

            # times input shape(1,5,1)
            x_t_ngram = (np.array([
                np.append(np.zeros(parameters['dim']['time_dim']),
                          np.array(self.spl['prefixes']['times'][i]),
                          axis=0)[-parameters['dim']['time_dim']:].reshape(
                              (parameters['dim']['time_dim'], 1))
            ]))
            # add intercase features if necessary
            if parameters['model_type'] in ['shared_cat', 'cnn_lstm']:
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram]
            elif parameters['model_type'] in [
                    'shared_cat_inter', 'shared_cat_inter_full',
                    'shared_cat_rd', 'shared_cat_wl', 'shared_cat_cx',
                    'cnn_lstm_inter', 'cnn_lstm_inter_full', 'shared_cat_city',
                    'shared_cat_snap'
            ]:
                # times input shape(1,5,1)
                inter_attr_num = (
                    self.spl['prefixes']['inter_attr'][i].shape[1])
                x_inter_ngram = np.array([
                    np.append(np.zeros(
                        (parameters['dim']['time_dim'], inter_attr_num)),
                              self.spl['prefixes']['inter_attr'][i],
                              axis=0)[-parameters['dim']['time_dim']:].reshape(
                                  (parameters['dim']['time_dim'],
                                   inter_attr_num))
                ])
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram]
            # predict
            predictions = self.model.predict(inputs)
            if self.imp == 'Random Choice':
                # Use this to get a random choice following as PDF
                pos = np.random.choice(np.arange(0, len(predictions[0][0])),
                                       p=predictions[0][0])
                pos1 = np.random.choice(np.arange(0, len(predictions[1][0])),
                                        p=predictions[1][0])
            elif self.imp == 'Arg Max':
                # Use this to get the max prediction
                pos = np.argmax(predictions[0][0])
                pos1 = np.argmax(predictions[1][0])

            # save results
            results.append({
                'ac_prefix':
                self.spl['prefixes']['activities'][i],
                'ac_expect':
                self.spl['next_evt']['activities'][i],
                'ac_pred':
                pos,
                'rl_prefix':
                self.spl['prefixes']['roles'][i],
                'rl_expect':
                self.spl['next_evt']['roles'][i],
                'rl_pred':
                pos1,
                'tm_prefix': [
                    self.rescale(x, parameters)
                    for x in self.spl['prefixes']['times'][i]
                ],
                'tm_expect':
                self.rescale(self.spl['next_evt']['times'][i], parameters),
                'tm_pred':
                self.rescale(predictions[2][0][0], parameters)
            })
        sup.print_done_task()
        return results
コード例 #22
0
ファイル: log_reader.py プロジェクト: dtdi/Simod
 def get_xes_events_data(self):
     """
     reads and parse all the events information from a xes file
     """
     temp_data = list()
     tree = ET.parse(self.input)
     root = tree.getroot()
     if self.ns_include:
         ns = {'xes': root.tag.split('}')[0].strip('{')}
         tags = dict(trace='xes:trace',
                     string='xes:string',
                     event='xes:event',
                     date='xes:date')
     else:
         ns = {'xes': ''}
         tags = dict(trace='trace',
                     string='string',
                     event='event',
                     date='date')
     traces = root.findall(tags['trace'], ns)
     i = 0
     sup.print_performed_task('Reading log traces ')
     for trace in traces:
         temp_trace = list()
         caseid = ''
         for string in trace.findall(tags['string'], ns):
             if string.attrib['key'] == 'concept:name':
                 caseid = string.attrib['value']
         for event in trace.findall(tags['event'], ns):
             task = ''
             user = ''
             event_type = ''
             for string in event.findall(tags['string'], ns):
                 if string.attrib['key'] == 'concept:name':
                     task = string.attrib['value']
                 if string.attrib['key'] == 'org:resource':
                     user = string.attrib['value']
                 if string.attrib['key'] == 'lifecycle:transition':
                     event_type = string.attrib['value'].lower()
             timestamp = ''
             for date in event.findall(tags['date'], ns):
                 if date.attrib['key'] == 'time:timestamp':
                     timestamp = date.attrib['value']
                     try:
                         timestamp = datetime.datetime.strptime(
                             timestamp[:-6], self.timeformat)
                     except ValueError:
                         timestamp = datetime.datetime.strptime(
                             timestamp, self.timeformat)
             # By default remove Start and End events
             # but will be added to standardize
             if task not in ['0', '-1', 'Start', 'End', 'start', 'end']:
                 if ((not self.one_timestamp) or
                     (self.one_timestamp and event_type == 'complete')):
                     temp_trace.append(
                         dict(caseid=caseid,
                              task=task,
                              event_type=event_type,
                              user=user,
                              timestamp=timestamp))
         if temp_trace:
             temp_trace = self.append_xes_start_end(temp_trace)
         temp_data.extend(temp_trace)
         i += 1
     self.raw_data = temp_data
     self.data = self.reorder_xes(temp_data)
     sup.print_done_task()
コード例 #23
0
def replay(process_graph, log, source='log', run_num=0):
    subsec_set = create_subsec_set(process_graph)
    parallel_gt_exec = parallel_execution_list(process_graph)
    not_conformant_traces = list()
    conformant_traces=list()
    process_stats=list()
    traces = log.get_traces()
    for index in range(0,len(traces)):
        trace_times = list()
        trace = traces[index]
        temp_gt_exec = parallel_gt_exec
        cursor = list()
        current_node = find_task_node(process_graph,trace[0]['task'])
        cursor.append(current_node)
        removal_allowed = True
        is_conformant = True
        #----time recording------
        trace_times.append(create_record(trace, 0))
        #------------------------
        for i in range(1, len(trace)):
            next_node = find_task_node(process_graph,trace[i]['task'])
            # If loop management
            if next_node == cursor[-1]:
                prev_record = find_previous_record(trace_times, process_graph.node[next_node]['name'])
                trace_times.append(create_record(trace, i, prev_record))
                process_graph.node[next_node]['executions'] += 1
            else:
                try:
                    cursor, prev_node = update_cursor(next_node, process_graph, cursor)
                    #----time recording------
                    prev_record = find_previous_record(trace_times, process_graph.node[prev_node]['name'])
                    trace_times.append(create_record(trace, i, prev_record))
                    process_graph.node[next_node]['executions'] += 1
                    #------------------------
                except:
                    is_conformant = False
                    break
                for element in reversed(cursor[:-1]):
                    # Process AND
                    if process_graph.node[element]['type'] == 'gate3':
                        gate = [d for d in temp_gt_exec if d['nod_num'] == element][0]
                        gate.update(dict(executed= gate['executed'] + 1))
                        if gate['executed'] < gate['num_paths']:
                            removal_allowed = False
                        else:
                            removal_allowed = True
                            cursor.remove(element)
                    # Process Task
                    elif process_graph.node[element]['type'] == 'task':
                        if (element,next_node) in subsec_set:
                            if removal_allowed:
                                cursor.remove(element)
                    # Process other
                    else:
                        if removal_allowed:
                            cursor.remove(element)
        if not is_conformant:
            not_conformant_traces.append(trace)
        else:
            conformant_traces.append(trace)
            process_stats.extend(trace_times)
        sup.print_progress(((index / (len(traces)-1))* 100),'Replaying process traces ')
    #------Filtering records and calculate stats---
    process_stats = list(filter(lambda x: x['task'] != 'Start' and x['task'] != 'End' and x['resource'] != 'AUTO', process_stats))
    process_stats = calculate_process_metrics(process_stats)
    [x.update(dict(source=source, run_num=run_num)) for x in process_stats]
    #----------------------------------------------
    sup.print_done_task()
    #------conformance percentage------------------
#    print('Conformance percentage: ' + str(sup.ffloat((len(conformant_traces)/len(traces)) * 100,2)) + '%')
    #----------------------------------------------
    return conformant_traces, not_conformant_traces, process_stats