def create_process_structure(bpmn, drawing=False, verbose=True):
    # Loading of bpmn structure into a directed graph
    g = load_process_structure(bpmn, verbose)
    if drawing:
        graph_network_x(g)
    if verbose:
        sup.print_done_task()
    return g
Beispiel #2
0
 def get_xes_events_data(self):
     log = pm4py.read_xes(self.input)
     try:
         source = log.attributes['source']
     except:
         source = ''
     flattern_log = ([{
         **event,
         **{
             'caseid': trace.attributes['concept:name']
         }
     } for trace in log for event in trace])
     temp_data = pd.DataFrame(flattern_log)
     temp_data['time:timestamp'] = temp_data.apply(
         lambda x: x['time:timestamp'].strftime(self.timeformat), axis=1)
     temp_data['time:timestamp'] = pd.to_datetime(
         temp_data['time:timestamp'], format=self.timeformat)
     temp_data.rename(columns={
         'concept:name': 'task',
         'lifecycle:transition': 'event_type',
         'org:resource': 'user',
         'time:timestamp': 'timestamp'
     },
                      inplace=True)
     temp_data = (
         temp_data[~temp_data.task.isin(['Start', 'End', 'start', 'end'])].
         reset_index(drop=True))
     temp_data = (temp_data[temp_data.event_type.isin(
         ['start', 'complete'])].reset_index(drop=True))
     if source == 'com.qbpsimulator':
         if len(temp_data.iloc[0].elementId.split('_')) > 1:
             temp_data['etype'] = temp_data.apply(
                 lambda x: x.elementId.split('_')[0], axis=1)
             temp_data = (temp_data[temp_data.etype == 'Task'].reset_index(
                 drop=True))
     self.raw_data = temp_data.to_dict('records')
     if self.verbose:
         sup.print_performed_task('Rearranging log traces ')
     self.data = self.reorder_xes(temp_data)
     self.data = pd.DataFrame(self.data)
     self.data.drop_duplicates(inplace=True)
     self.data = self.data.to_dict('records')
     self.append_csv_start_end()
     if self.verbose:
         sup.print_done_task()
Beispiel #3
0
    def discover_roles(self):
        associations = lambda x: (self.tasks[x['task']], self.users[x['user']])
        self.data['ac_rl'] = self.data.apply(associations, axis=1)

        freq_matrix = (self.data.groupby(
            by='ac_rl')['task'].count().reset_index().rename(
                columns={'task': 'freq'}))
        freq_matrix = {
            x['ac_rl']: x['freq']
            for x in freq_matrix.to_dict('records')
        }

        profiles = self.build_profile(freq_matrix)

        sup.print_progress(((20 / 100) * 100), 'Analysing resource pool ')
        # building of a correl matrix between resouces profiles
        correl_matrix = self.det_correl_matrix(profiles)
        sup.print_progress(((40 / 100) * 100), 'Analysing resource pool ')
        # creation of a rel network between resouces
        g = nx.Graph()
        for user in self.users.values():
            g.add_node(user)
        for rel in correl_matrix:
            # creation of edges between nodes excluding the same elements
            # and those below the similarity threshold
            if rel['distance'] > self.sim_threshold and rel['x'] != rel['y']:
                g.add_edge(rel['x'], rel['y'], weight=rel['distance'])
        sup.print_progress(((60 / 100) * 100), 'Analysing resource pool ')
        # extraction of fully conected subgraphs as roles
        sub_graphs = list(nx.connected_components(g))
        sup.print_progress(((80 / 100) * 100), 'Analysing resource pool ')
        # role definition from graph
        roles = self.role_definition(sub_graphs)
        # plot creation (optional)
        # if drawing == True:
        #     graph_network(g, sub_graphs)
        sup.print_progress(((100 / 100) * 100), 'Analysing resource pool ')
        sup.print_done_task()
        return roles
Beispiel #4
0
 def get_csv_events_data(self):
     """
     reads and parse all the events information from a csv file
     """
     if self.verbose:
         sup.print_performed_task('Reading log traces ')
     log = pd.read_csv(self.input)
     if self.one_timestamp:
         self.column_names['Complete Timestamp'] = 'end_timestamp'
         log = log.rename(columns=self.column_names)
         log = log.astype({'caseid': object})
         log = (log[(log.task != 'Start')
                    & (log.task != 'End')].reset_index(drop=True))
         if self.filter_d_attrib:
             log = log[['caseid', 'task', 'user', 'end_timestamp']]
         log['end_timestamp'] = pd.to_datetime(log['end_timestamp'],
                                               format=self.timeformat)
     else:
         self.column_names['Start Timestamp'] = 'start_timestamp'
         self.column_names['Complete Timestamp'] = 'end_timestamp'
         log = log.rename(columns=self.column_names)
         log = log.astype({'caseid': object})
         log = (log[(log.task != 'Start')
                    & (log.task != 'End')].reset_index(drop=True))
         if self.filter_d_attrib:
             log = log[[
                 'caseid', 'task', 'user', 'start_timestamp',
                 'end_timestamp'
             ]]
         log['start_timestamp'] = pd.to_datetime(log['start_timestamp'],
                                                 format=self.timeformat)
         log['end_timestamp'] = pd.to_datetime(log['end_timestamp'],
                                               format=self.timeformat)
     self.data = log.to_dict('records')
     self.append_csv_start_end()
     self.split_event_transitions()
     if self.verbose:
         sup.print_done_task()
    def _predict_next_event_shared_cat(self, parameters, vectorizer):
        """Generate business process suffixes using a keras trained model.
        Args:
            model (keras model): keras trained model.
            prefixes (list): list of prefixes.
            ac_index (dict): index of activities.
            rl_index (dict): index of roles.
            imp (str): method of next event selection.
        """
        # Generation of predictions
        results = list()
        for i, _ in enumerate(self.spl['prefixes']['activities']):
            # Activities and roles input shape(1,5)
            x_ac_ngram = (np.append(
                np.zeros(parameters['dim']['time_dim']),
                np.array(self.spl['prefixes']['activities'][i]),
                axis=0)[-parameters['dim']['time_dim']:].reshape(
                    (1, parameters['dim']['time_dim'])))

            x_rl_ngram = (np.append(
                np.zeros(parameters['dim']['time_dim']),
                np.array(self.spl['prefixes']['roles'][i]),
                axis=0)[-parameters['dim']['time_dim']:].reshape(
                    (1, parameters['dim']['time_dim'])))

            # times input shape(1,5,1)
            times_attr_num = (self.spl['prefixes']['times'][i].shape[1])
            x_t_ngram = np.array([
                np.append(np.zeros(
                    (parameters['dim']['time_dim'], times_attr_num)),
                          self.spl['prefixes']['times'][i],
                          axis=0)[-parameters['dim']['time_dim']:].reshape(
                              (parameters['dim']['time_dim'], times_attr_num))
            ])

            # add intercase features if necessary
            if vectorizer in ['basic']:
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram]
            elif vectorizer in ['inter']:
                # times input shape(1,5,1)
                inter_attr_num = (
                    self.spl['prefixes']['inter_attr'][i].shape[1])
                x_inter_ngram = np.array([
                    np.append(np.zeros(
                        (parameters['dim']['time_dim'], inter_attr_num)),
                              self.spl['prefixes']['inter_attr'][i],
                              axis=0)[-parameters['dim']['time_dim']:].reshape(
                                  (parameters['dim']['time_dim'],
                                   inter_attr_num))
                ])
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram]
            # predict
            preds = self.model.predict(inputs)
            if self.imp == 'random_choice':
                # Use this to get a random choice following as PDF
                pos = np.random.choice(np.arange(0, len(preds[0][0])),
                                       p=preds[0][0])
                pos1 = np.random.choice(np.arange(0, len(preds[1][0])),
                                        p=preds[1][0])
            elif self.imp == 'arg_max':
                # Use this to get the max prediction
                pos = np.argmax(preds[0][0])
                pos1 = np.argmax(preds[1][0])

            # save results
            predictions = [pos, pos1, preds[2][0][0]]
            if not parameters['one_timestamp']:
                predictions.extend([preds[2][0][1]])
            results.append(
                self.create_result_record(i, self.spl, predictions,
                                          parameters))
        sup.print_done_task()
        return results
Beispiel #6
0
    def _predict_suffix_shared_cat(self, parms, vectorizer):
        """Generate business process suffixes using a keras trained model.
        Args:
            model (keras model): keras trained model.
            prefixes (list): list of prefixes.
            ac_index (dict): index of activities.
            rl_index (dict): index of roles.
            imp (str): method of next event selection.
        """
        # Generation of predictions
        results = list()
        for i, _ in enumerate(self.spl['prefixes']['activities']):
            # Activities and roles input shape(1,5)
            x_ac_ngram = (np.append(
                np.zeros(parms['dim']['time_dim']),
                np.array(self.spl['prefixes']['activities'][i]),
                axis=0)[-parms['dim']['time_dim']:].reshape(
                    (1, parms['dim']['time_dim'])))

            x_rl_ngram = (np.append(
                np.zeros(parms['dim']['time_dim']),
                np.array(self.spl['prefixes']['roles'][i]),
                axis=0)[-parms['dim']['time_dim']:].reshape(
                    (1, parms['dim']['time_dim'])))

            times_attr_num = (self.spl['prefixes']['times'][i].shape[1])
            x_t_ngram = np.array([
                np.append(np.zeros((parms['dim']['time_dim'], times_attr_num)),
                          self.spl['prefixes']['times'][i],
                          axis=0)[-parms['dim']['time_dim']:].reshape(
                              (parms['dim']['time_dim'], times_attr_num))
            ])
            if vectorizer in ['basic']:
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram]
            elif vectorizer in ['inter']:
                inter_attr_num = self.spl['prefixes']['inter_attr'][i].shape[1]
                x_inter_ngram = np.array([
                    np.append(np.zeros(
                        (parms['dim']['time_dim'], inter_attr_num)),
                              self.spl['prefixes']['inter_attr'][i],
                              axis=0)[-parms['dim']['time_dim']:].reshape(
                                  (parms['dim']['time_dim'], inter_attr_num))
                ])
                inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram]

            pref_size = len(self.spl['prefixes']['activities'][i])
            acum_dur, acum_wait = list(), list()
            ac_suf, rl_suf = list(), list()
            for _ in range(1, self.max_trace_size):
                preds = self.model.predict(inputs)
                if self.imp == 'random_choice':
                    # Use this to get a random choice following as PDF the predictions
                    pos = np.random.choice(np.arange(0, len(preds[0][0])),
                                           p=preds[0][0])
                    pos1 = np.random.choice(np.arange(0, len(preds[1][0])),
                                            p=preds[1][0])
                elif self.imp == 'arg_max':
                    # Use this to get the max prediction
                    pos = np.argmax(preds[0][0])
                    pos1 = np.argmax(preds[1][0])
                # Activities accuracy evaluation
                x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1)
                x_ac_ngram = np.delete(x_ac_ngram, 0, 1)
                x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1)
                x_rl_ngram = np.delete(x_rl_ngram, 0, 1)
                x_t_ngram = np.append(x_t_ngram, [preds[2]], axis=1)
                x_t_ngram = np.delete(x_t_ngram, 0, 1)
                if vectorizer in ['basic']:
                    inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram]
                elif vectorizer in ['inter']:
                    x_inter_ngram = np.append(x_inter_ngram, [preds[3]],
                                              axis=1)
                    x_inter_ngram = np.delete(x_inter_ngram, 0, 1)
                    inputs = [x_ac_ngram, x_rl_ngram, x_t_ngram, x_inter_ngram]
                # Stop if the next prediction is the end of the trace
                # otherwise until the defined max_size
                ac_suf.append(pos)
                rl_suf.append(pos1)
                acum_dur.append(preds[2][0][0])
                if not parms['one_timestamp']:
                    acum_wait.append(preds[2][0][1])
                if parms['index_ac'][pos] == 'end':
                    break
            # save results
            predictions = [ac_suf, rl_suf, acum_dur]
            if not parms['one_timestamp']:
                predictions.extend([acum_wait])
            results.append(
                self.create_result_record(i, self.spl, predictions, parms,
                                          pref_size))
        sup.print_done_task()
        return results