Ejemplo n.º 1
0
 def get_csv_events_data(self):
     """
     reads and parse all the events information from a csv file
     """
     sup.print_performed_task('Reading log traces ')
     log = pd.read_csv(self.input, dtype={'user': str})
     if self.one_timestamp:
         self.column_names['Complete Timestamp'] = 'end_timestamp'
         log = log.rename(columns=self.column_names)
         log = log.astype({'caseid': object})
         log = (log[(log.task != 'Start') & (log.task != 'End')]
                .reset_index(drop=True))
         if self.filter_d_attrib:
             log = log[['caseid', 'task', 'user', 'end_timestamp']]
         log['end_timestamp'] = pd.to_datetime(log['end_timestamp'],
                                               format=self.timeformat)
     else:
         self.column_names['Start Timestamp'] = 'start_timestamp'
         self.column_names['Complete Timestamp'] = 'end_timestamp'
         log = log.rename(columns=self.column_names)
         log = log.astype({'caseid': object})
         log = (log[(log.task != 'Start') & (log.task != 'End')]
                .reset_index(drop=True))
         if self.filter_d_attrib:
             log = log[['caseid', 'task', 'user',
                        'start_timestamp', 'end_timestamp']]
         log['start_timestamp'] = pd.to_datetime(log['start_timestamp'],
                                                 format=self.timeformat)
         log['end_timestamp'] = pd.to_datetime(log['end_timestamp'],
                                               format=self.timeformat)
     log['user'].fillna('SYS', inplace=True)
     self.data = log.to_dict('records')
     self.append_csv_start_end()
     self.split_event_transitions()
     sup.print_done_task()
 def predict(self, params, model, spl, imp):
     self.model = model
     self.spl = spl
     self.imp = imp
     predictor = self._get_predictor(params['model_type'])
     sup.print_performed_task('Predicting next events')
     return predictor(params)
Ejemplo n.º 3
0
 def predict(self, params, model, spl, imp):
     self.model = model
     self.spl = spl
     self.max_trace_size = params['max_trace_size']
     self.imp = imp
     predictor = self._get_predictor(params['model_type'])
     sup.print_performed_task('Predicting suffixes')
     return predictor(params)
Ejemplo n.º 4
0
    def get_xes_events_data(self, filename,start_timeformat, end_timeformat, ns_include, one_timestamp):
        """reads and parse all the events information from a xes file"""
        temp_data = list()
        tree = ET.parse(filename)
        root = tree.getroot()
        if ns_include:
            #TODO revisar como poder cargar el mane space de forma automatica del root
            ns = {'xes': root.tag.split('}')[0].strip('{')}
            tags = dict(trace='xes:trace',string='xes:string',event='xes:event',date='xes:date')
        else:
            ns = {'xes':''}
            tags = dict(trace='trace',string='string',event='event',date='date')
        traces = root.findall(tags['trace'], ns)
        i = 0
        sup.print_performed_task('Reading log traces ')
        for trace in traces:
#            sup.print_progress(((i / (len(traces) - 1)) * 100), 'Reading log traces ')
            caseid = ''
            for string in trace.findall(tags['string'], ns):
                if string.attrib['key'] == 'concept:name':
                    caseid = string.attrib['value']
            for event in trace.findall(tags['event'], ns):
                task = ''
                user = ''
                event_type = ''
                complete_timestamp = ''
                for string in event.findall(tags['string'], ns):
                    if string.attrib['key'] == 'concept:name':
                        task = string.attrib['value']                        
                    if string.attrib['key'] == 'org:resource':
                        user = string.attrib['value']
                    if string.attrib['key'] == 'lifecycle:transition':
                        event_type = string.attrib['value'].lower()
                    if string.attrib['key'] == 'Complete_Timestamp':
                        complete_timestamp = string.attrib['value']
                        if complete_timestamp != 'End':
                            complete_timestamp = datetime.datetime.strptime(complete_timestamp, end_timeformat)
                timestamp = ''
                for date in event.findall(tags['date'], ns):
                    if date.attrib['key'] == 'time:timestamp':
                        timestamp = date.attrib['value']
                        try:
                            timestamp = datetime.datetime.strptime(timestamp[:-6], start_timeformat)
                        except ValueError:
                            timestamp = datetime.datetime.strptime(timestamp, start_timeformat)
                if not (task == '0' or task == '-1'):
                    temp_data.append(
                        dict(caseid=caseid, task=task, event_type=event_type, user=user, start_timestamp=timestamp,
                             end_timestamp=complete_timestamp))
            i += 1
        raw_data = temp_data
        temp_data = self.reorder_xes(temp_data, one_timestamp)
        sup.print_done_task()
        return temp_data, raw_data
Ejemplo n.º 5
0
def role_discovery(data, drawing, sim_percentage):
    tasks = list(set(list(map(lambda x: x[0], data))))
    try:
        tasks.remove('Start')
    except Exception:
        pass
    tasks = [dict(index=i, data=tasks[i]) for i in range(0, len(tasks))]
    users = list(set(list(map(lambda x: x[1], data))))
    try:
        users.remove('Start')
    except Exception:
        pass
    users = [dict(index=i, data=users[i]) for i in range(0, len(users))]
    data_transform = list(
        map(lambda x: [find_index(tasks, x[0]),
                       find_index(users, x[1])], data))
    unique = list(set(tuple(i) for i in data_transform))
    unique = [list(i) for i in unique]
    # [print(uni) for uni in users]
    # building of a task-size profile of task execution per resource
    profiles = build_profile(users, det_freq_matrix(unique, data_transform),
                             len(tasks))
    sup.print_performed_task('Analysing resource pool ')
    #    sup.print_progress(((20 / 100)* 100),'Analysing resource pool ')
    # building of a correlation matrix between resouces profiles
    correlation_matrix = det_correlation_matrix(profiles)
    #    sup.print_progress(((40 / 100)* 100),'Analysing resource pool ')
    # creation of a relation network between resouces
    g = nx.Graph()
    for user in users:
        g.add_node(user['index'])
    for relation in correlation_matrix:
        # creation of edges between nodes excluding the same element correlation
        # and those below the 0.7 threshold of similarity
        if relation['distance'] > sim_percentage and relation['x'] != relation[
                'y']:
            g.add_edge(relation['x'],
                       relation['y'],
                       weight=relation['distance'])
#    sup.print_progress(((60 / 100)* 100),'Analysing resource pool ')
# extraction of fully conected subgraphs as roles
    sub_graphs = list(nx.connected_component_subgraphs(g))
    #    sup.print_progress(((80 / 100)* 100),'Analysing resource pool ')
    # role definition from graph
    roles = role_definition(sub_graphs, users)
    # plot creation (optional)
    if drawing == True:
        graph_network(g, sub_graphs)


#    sup.print_progress(((100 / 100)* 100),'Analysing resource pool ')
    sup.print_done_task()
    return roles
    def _predict_event_log_shared_cat(self, parms):
        """Generate business process traces using a keras trained model.
        Args:
            model (keras model): keras trained model.
            imp (str): method of next event selection.
            num_cases (int): number of traces to generate.
            max_trace_size (int): max size of the trace
        """
        sup.print_performed_task('Generating traces')
        generated_event_log = list()
        for case in range(0, parms['num_cases']):
            x_trace = list()
            x_ac_ngram = np.zeros(
                (1, parms['dim']['time_dim']), dtype=np.float32)
            x_rl_ngram = np.zeros(
                (1, parms['dim']['time_dim']), dtype=np.float32)
            x_t_ngram = np.zeros(
                (1, parms['dim']['time_dim'], 1), dtype=np.float32)
            # TODO: add intercase support
            for _ in range(1, self.max_trace_size):
                predictions = self.model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram])
                if self.imp == 'Random Choice':
                    # Use this to get a random choice following as PDF
                    pos = np.random.choice(
                        np.arange(0, len(predictions[0][0])),
                        p=predictions[0][0])
                    pos1 = np.random.choice(
                        np.arange(0, len(predictions[1][0])),
                        p=predictions[1][0])
                elif self.imp == 'Arg Max':
                    # Use this to get the max prediction
                    pos = np.argmax(predictions[0][0])
                    pos1 = np.argmax(predictions[1][0])
                x_trace.append([pos, pos1, predictions[2][0][0]])
    #            # Add prediction to n_gram
                x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1)
                x_ac_ngram = np.delete(x_ac_ngram, 0, 1)
                x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1)
                x_rl_ngram = np.delete(x_rl_ngram, 0, 1)
                x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1)
                x_t_ngram = np.delete(x_t_ngram, 0, 1)

    #            # Stop if the next prediction is the end of the trace
    #            # otherwise until the defined max_size
                if parms['index_ac'][pos] == 'end':
                    break
            generated_event_log.extend(self.decode_trace(parms, x_trace, case))
        sup.print_done_task()
        return generated_event_log
Ejemplo n.º 7
0
    def define_probabilities(self) -> None:
        """
        Defines the gateways' probabilities according with an spcified method

        """
        sup.print_performed_task('Analysing gateways` probabilities')
        # Analisys of gateways probabilities
        if self.method == 'discovery':
            gateways = self.analize_gateways()
        elif self.method == 'random':
            gateways = self.analize_gateways_random()
        elif self.method == 'equiprobable':
            gateways = self.analize_gateways_equi()
        # Fix 0 probabilities and float error sums
        gateways = self.normalize_probabilities(gateways)
        # Creating response list
        gids = lambda x: self.process_graph.node[x['gate']]['id']
        gateways['gatewayid'] = gateways.apply(gids, axis=1)
        gids = lambda x: self.process_graph.node[x['t_path']]['id']
        gateways['out_path_id'] = gateways.apply(gids, axis=1)
        self.probabilities = gateways[['gatewayid', 'out_path_id',
                                       'prob']].to_dict('records')
        sup.print_done_task()
Ejemplo n.º 8
0
 def get_xes_events_data(self):
     """
     reads and parse all the events information from a xes file
     """
     temp_data = list()
     tree = ET.parse(self.input)
     root = tree.getroot()
     if self.ns_include:
         ns = {'xes': root.tag.split('}')[0].strip('{')}
         tags = dict(trace='xes:trace',
                     string='xes:string',
                     event='xes:event',
                     date='xes:date')
     else:
         ns = {'xes': ''}
         tags = dict(trace='trace',
                     string='string',
                     event='event',
                     date='date')
     traces = root.findall(tags['trace'], ns)
     i = 0
     sup.print_performed_task('Reading log traces ')
     for trace in traces:
         temp_trace = list()
         caseid = ''
         for string in trace.findall(tags['string'], ns):
             if string.attrib['key'] == 'concept:name':
                 caseid = string.attrib['value']
         for event in trace.findall(tags['event'], ns):
             task = ''
             user = ''
             event_type = ''
             for string in event.findall(tags['string'], ns):
                 if string.attrib['key'] == 'concept:name':
                     task = string.attrib['value']
                 if string.attrib['key'] == 'org:resource':
                     user = string.attrib['value']
                 if string.attrib['key'] == 'lifecycle:transition':
                     event_type = string.attrib['value'].lower()
             timestamp = ''
             for date in event.findall(tags['date'], ns):
                 if date.attrib['key'] == 'time:timestamp':
                     timestamp = date.attrib['value']
                     try:
                         timestamp = datetime.datetime.strptime(
                             timestamp[:-6], self.timeformat)
                     except ValueError:
                         timestamp = datetime.datetime.strptime(
                             timestamp, self.timeformat)
             # By default remove Start and End events
             # but will be added to standardize
             if task not in ['0', '-1', 'Start', 'End', 'start', 'end']:
                 if ((not self.one_timestamp) or
                     (self.one_timestamp and event_type == 'complete')):
                     temp_trace.append(
                         dict(caseid=caseid,
                              task=task,
                              event_type=event_type,
                              user=user,
                              timestamp=timestamp))
         if temp_trace:
             temp_trace = self.append_xes_start_end(temp_trace)
         temp_data.extend(temp_trace)
         i += 1
     self.raw_data = temp_data
     self.data = self.reorder_xes(temp_data)
     sup.print_done_task()