def get_csv_events_data(self): """ reads and parse all the events information from a csv file """ sup.print_performed_task('Reading log traces ') log = pd.read_csv(self.input, dtype={'user': str}) if self.one_timestamp: self.column_names['Complete Timestamp'] = 'end_timestamp' log = log.rename(columns=self.column_names) log = log.astype({'caseid': object}) log = (log[(log.task != 'Start') & (log.task != 'End')] .reset_index(drop=True)) if self.filter_d_attrib: log = log[['caseid', 'task', 'user', 'end_timestamp']] log['end_timestamp'] = pd.to_datetime(log['end_timestamp'], format=self.timeformat) else: self.column_names['Start Timestamp'] = 'start_timestamp' self.column_names['Complete Timestamp'] = 'end_timestamp' log = log.rename(columns=self.column_names) log = log.astype({'caseid': object}) log = (log[(log.task != 'Start') & (log.task != 'End')] .reset_index(drop=True)) if self.filter_d_attrib: log = log[['caseid', 'task', 'user', 'start_timestamp', 'end_timestamp']] log['start_timestamp'] = pd.to_datetime(log['start_timestamp'], format=self.timeformat) log['end_timestamp'] = pd.to_datetime(log['end_timestamp'], format=self.timeformat) log['user'].fillna('SYS', inplace=True) self.data = log.to_dict('records') self.append_csv_start_end() self.split_event_transitions() sup.print_done_task()
def predict(self, params, model, spl, imp): self.model = model self.spl = spl self.imp = imp predictor = self._get_predictor(params['model_type']) sup.print_performed_task('Predicting next events') return predictor(params)
def predict(self, params, model, spl, imp): self.model = model self.spl = spl self.max_trace_size = params['max_trace_size'] self.imp = imp predictor = self._get_predictor(params['model_type']) sup.print_performed_task('Predicting suffixes') return predictor(params)
def get_xes_events_data(self, filename,start_timeformat, end_timeformat, ns_include, one_timestamp): """reads and parse all the events information from a xes file""" temp_data = list() tree = ET.parse(filename) root = tree.getroot() if ns_include: #TODO revisar como poder cargar el mane space de forma automatica del root ns = {'xes': root.tag.split('}')[0].strip('{')} tags = dict(trace='xes:trace',string='xes:string',event='xes:event',date='xes:date') else: ns = {'xes':''} tags = dict(trace='trace',string='string',event='event',date='date') traces = root.findall(tags['trace'], ns) i = 0 sup.print_performed_task('Reading log traces ') for trace in traces: # sup.print_progress(((i / (len(traces) - 1)) * 100), 'Reading log traces ') caseid = '' for string in trace.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': caseid = string.attrib['value'] for event in trace.findall(tags['event'], ns): task = '' user = '' event_type = '' complete_timestamp = '' for string in event.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': task = string.attrib['value'] if string.attrib['key'] == 'org:resource': user = string.attrib['value'] if string.attrib['key'] == 'lifecycle:transition': event_type = string.attrib['value'].lower() if string.attrib['key'] == 'Complete_Timestamp': complete_timestamp = string.attrib['value'] if complete_timestamp != 'End': complete_timestamp = datetime.datetime.strptime(complete_timestamp, end_timeformat) timestamp = '' for date in event.findall(tags['date'], ns): if date.attrib['key'] == 'time:timestamp': timestamp = date.attrib['value'] try: timestamp = datetime.datetime.strptime(timestamp[:-6], start_timeformat) except ValueError: timestamp = datetime.datetime.strptime(timestamp, start_timeformat) if not (task == '0' or task == '-1'): temp_data.append( dict(caseid=caseid, task=task, event_type=event_type, user=user, start_timestamp=timestamp, end_timestamp=complete_timestamp)) i += 1 raw_data = temp_data temp_data = self.reorder_xes(temp_data, one_timestamp) sup.print_done_task() return temp_data, raw_data
def role_discovery(data, drawing, sim_percentage): tasks = list(set(list(map(lambda x: x[0], data)))) try: tasks.remove('Start') except Exception: pass tasks = [dict(index=i, data=tasks[i]) for i in range(0, len(tasks))] users = list(set(list(map(lambda x: x[1], data)))) try: users.remove('Start') except Exception: pass users = [dict(index=i, data=users[i]) for i in range(0, len(users))] data_transform = list( map(lambda x: [find_index(tasks, x[0]), find_index(users, x[1])], data)) unique = list(set(tuple(i) for i in data_transform)) unique = [list(i) for i in unique] # [print(uni) for uni in users] # building of a task-size profile of task execution per resource profiles = build_profile(users, det_freq_matrix(unique, data_transform), len(tasks)) sup.print_performed_task('Analysing resource pool ') # sup.print_progress(((20 / 100)* 100),'Analysing resource pool ') # building of a correlation matrix between resouces profiles correlation_matrix = det_correlation_matrix(profiles) # sup.print_progress(((40 / 100)* 100),'Analysing resource pool ') # creation of a relation network between resouces g = nx.Graph() for user in users: g.add_node(user['index']) for relation in correlation_matrix: # creation of edges between nodes excluding the same element correlation # and those below the 0.7 threshold of similarity if relation['distance'] > sim_percentage and relation['x'] != relation[ 'y']: g.add_edge(relation['x'], relation['y'], weight=relation['distance']) # sup.print_progress(((60 / 100)* 100),'Analysing resource pool ') # extraction of fully conected subgraphs as roles sub_graphs = list(nx.connected_component_subgraphs(g)) # sup.print_progress(((80 / 100)* 100),'Analysing resource pool ') # role definition from graph roles = role_definition(sub_graphs, users) # plot creation (optional) if drawing == True: graph_network(g, sub_graphs) # sup.print_progress(((100 / 100)* 100),'Analysing resource pool ') sup.print_done_task() return roles
def _predict_event_log_shared_cat(self, parms): """Generate business process traces using a keras trained model. Args: model (keras model): keras trained model. imp (str): method of next event selection. num_cases (int): number of traces to generate. max_trace_size (int): max size of the trace """ sup.print_performed_task('Generating traces') generated_event_log = list() for case in range(0, parms['num_cases']): x_trace = list() x_ac_ngram = np.zeros( (1, parms['dim']['time_dim']), dtype=np.float32) x_rl_ngram = np.zeros( (1, parms['dim']['time_dim']), dtype=np.float32) x_t_ngram = np.zeros( (1, parms['dim']['time_dim'], 1), dtype=np.float32) # TODO: add intercase support for _ in range(1, self.max_trace_size): predictions = self.model.predict([x_ac_ngram, x_rl_ngram, x_t_ngram]) if self.imp == 'Random Choice': # Use this to get a random choice following as PDF pos = np.random.choice( np.arange(0, len(predictions[0][0])), p=predictions[0][0]) pos1 = np.random.choice( np.arange(0, len(predictions[1][0])), p=predictions[1][0]) elif self.imp == 'Arg Max': # Use this to get the max prediction pos = np.argmax(predictions[0][0]) pos1 = np.argmax(predictions[1][0]) x_trace.append([pos, pos1, predictions[2][0][0]]) # # Add prediction to n_gram x_ac_ngram = np.append(x_ac_ngram, [[pos]], axis=1) x_ac_ngram = np.delete(x_ac_ngram, 0, 1) x_rl_ngram = np.append(x_rl_ngram, [[pos1]], axis=1) x_rl_ngram = np.delete(x_rl_ngram, 0, 1) x_t_ngram = np.append(x_t_ngram, [predictions[2]], axis=1) x_t_ngram = np.delete(x_t_ngram, 0, 1) # # Stop if the next prediction is the end of the trace # # otherwise until the defined max_size if parms['index_ac'][pos] == 'end': break generated_event_log.extend(self.decode_trace(parms, x_trace, case)) sup.print_done_task() return generated_event_log
def define_probabilities(self) -> None: """ Defines the gateways' probabilities according with an spcified method """ sup.print_performed_task('Analysing gateways` probabilities') # Analisys of gateways probabilities if self.method == 'discovery': gateways = self.analize_gateways() elif self.method == 'random': gateways = self.analize_gateways_random() elif self.method == 'equiprobable': gateways = self.analize_gateways_equi() # Fix 0 probabilities and float error sums gateways = self.normalize_probabilities(gateways) # Creating response list gids = lambda x: self.process_graph.node[x['gate']]['id'] gateways['gatewayid'] = gateways.apply(gids, axis=1) gids = lambda x: self.process_graph.node[x['t_path']]['id'] gateways['out_path_id'] = gateways.apply(gids, axis=1) self.probabilities = gateways[['gatewayid', 'out_path_id', 'prob']].to_dict('records') sup.print_done_task()
def get_xes_events_data(self): """ reads and parse all the events information from a xes file """ temp_data = list() tree = ET.parse(self.input) root = tree.getroot() if self.ns_include: ns = {'xes': root.tag.split('}')[0].strip('{')} tags = dict(trace='xes:trace', string='xes:string', event='xes:event', date='xes:date') else: ns = {'xes': ''} tags = dict(trace='trace', string='string', event='event', date='date') traces = root.findall(tags['trace'], ns) i = 0 sup.print_performed_task('Reading log traces ') for trace in traces: temp_trace = list() caseid = '' for string in trace.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': caseid = string.attrib['value'] for event in trace.findall(tags['event'], ns): task = '' user = '' event_type = '' for string in event.findall(tags['string'], ns): if string.attrib['key'] == 'concept:name': task = string.attrib['value'] if string.attrib['key'] == 'org:resource': user = string.attrib['value'] if string.attrib['key'] == 'lifecycle:transition': event_type = string.attrib['value'].lower() timestamp = '' for date in event.findall(tags['date'], ns): if date.attrib['key'] == 'time:timestamp': timestamp = date.attrib['value'] try: timestamp = datetime.datetime.strptime( timestamp[:-6], self.timeformat) except ValueError: timestamp = datetime.datetime.strptime( timestamp, self.timeformat) # By default remove Start and End events # but will be added to standardize if task not in ['0', '-1', 'Start', 'End', 'start', 'end']: if ((not self.one_timestamp) or (self.one_timestamp and event_type == 'complete')): temp_trace.append( dict(caseid=caseid, task=task, event_type=event_type, user=user, timestamp=timestamp)) if temp_trace: temp_trace = self.append_xes_start_end(temp_trace) temp_data.extend(temp_trace) i += 1 self.raw_data = temp_data self.data = self.reorder_xes(temp_data) sup.print_done_task()