def match_predefined_time(self): """ Perform the matching btween the information given by the hyper-opt and the BPMN model and resources data Returns ------- elements_data : Dataframe """ elements_data = list() # Predefined tasks records creation default_record = {'type': 'EXPONENTIAL', 'mean': '0', 'arg2': '0'} for task, value in self.pdef_values.items(): record = { **{ 'id': sup.gen_id(), 'name': str(task), 'arg1': str(value) }, **default_record } elements_data.append(record) # Check If there is tasks with not predefined time pdef_tasks = list(self.pdef_values.keys()) not_included = [task for task in self.tasks if task not in pdef_tasks] default_record = { 'type': 'EXPONENTIAL', 'mean': '0', 'arg1': '60', 'arg2': '0' } for task in not_included: elements_data.append({ **{ 'id': sup.gen_id(), 'name': task }, **default_record }) elements_data = pd.DataFrame(elements_data) # Matching with model info elements_data = elements_data.merge(self.model_data[[ 'name', 'elementid' ]], on='name', how='left').sort_values(by='name') return elements_data
def mine_processing_time(self): """ Performs the mining of activities durations from data Returns ------- elements_data : Dataframe """ elements_data = list() for task in self.tasks: s_key = 'duration' if self.one_timestamp else 'processing_time' task_processing = (self.process_stats[self.process_stats.task == task][s_key].tolist()) dist = pdf.DistributionFinder(task_processing).distribution elements_data.append({ 'id': sup.gen_id(), 'type': dist['dname'], 'name': task, 'mean': str(dist['dparams']['mean']), 'arg1': str(dist['dparams']['arg1']), 'arg2': str(dist['dparams']['arg2']) }) elements_data = pd.DataFrame(elements_data) elements_data = elements_data.merge( self.model_data[['name', 'elementid']], on='name', how='left') return elements_data
def add_start_end_info(self, elements_data): # records creation temp_elements_data = list() default_record = { 'type': 'FIXED', 'mean': '0', 'arg1': '0', 'arg2': '0' } for task in ['Start', 'End']: temp_elements_data.append({ **{ 'id': sup.gen_id(), 'name': task }, **default_record }) temp_elements_data = pd.DataFrame(temp_elements_data) temp_elements_data = temp_elements_data.merge( self.model_data[['name', 'elementid']], on='name', how='left').sort_values(by='name') temp_elements_data['r_name'] = 'SYSTEM' # resource id addition resource_id = (pd.DataFrame.from_dict( self.resource_pool)[['id', 'name']].rename(columns={ 'id': 'resource', 'name': 'r_name' })) temp_elements_data = (temp_elements_data.merge( resource_id, on='r_name', how='left').drop(columns=['r_name'])) # Appening to the elements data temp_elements_data = temp_elements_data.to_dict('records') elements_data.extend(temp_elements_data) return elements_data
def default_values(self): """ Performs the mining of activities durations from data Returns ------- elements_data : Dataframe """ elements_data = list() for task in self.tasks: s_key = 'duration' if self.one_timestamp else 'processing_time' task_processing = (self.process_stats[self.process_stats.task == task][s_key].tolist()) try: mean_time = np.mean(task_processing) if task_processing else 0 except: mean_time = 0 elements_data.append({ 'id': sup.gen_id(), 'type': 'EXPONENTIAL', 'name': task, 'mean': str(0), 'arg1': str(np.round(mean_time, 2)), 'arg2': str(0) }) elements_data = pd.DataFrame(elements_data) elements_data = elements_data.merge( self.model_data[['name', 'elementid']], on='name', how='left') return elements_data.to_dict('records')
def analize_schedules(resource_table, log, default=False, dtype=None): resource_pool = list() if default: time_table, resource_table = create_timetables(resource_table, dtype=dtype) data = sorted(resource_table, key=lambda x: x['role']) for key, group in itertools.groupby(data, key=lambda x: x['role']): values = list(group) group_resources = [x['resource'] for x in values] resource_pool.append( dict(id=sup.gen_id(), name=key, total_amount=str(len(group_resources)), costxhour="20", timetable_id="QBP_DEFAULT_TIMETABLE")) resource_pool[0]['id'] = 'QBP_DEFAULT_RESOURCE' resource_pool.append( dict(id='0', name='Role 0', total_amount='1', costxhour="0", timetable_id="QBP_DEFAULT_TIMETABLE")) else: print('test') return resource_pool, time_table, resource_table
def extract_parameters(log, bpmn, process_graph): if bpmn != None and log != None: bpmnId = bpmn.getProcessId() startEventId = bpmn.getStartEventId() # Creation of process graph #------------------------------------------------------------------- # Analysing resource pool LV917 or 247 roles, resource_table = rl.read_resource_pool(log, drawing=False, sim_percentage=0.5) resource_pool, time_table, resource_table = sch.analize_schedules( resource_table, log, True, '247') #------------------------------------------------------------------- # Process replaying conformed_traces, not_conformed_traces, process_stats = rpl.replay( process_graph, log) # ------------------------------------------------------------------- # Adding role to process stats for stat in process_stats: role = list( filter(lambda x: x['resource'] == stat['resource'], resource_table))[0]['role'] stat['role'] = role #------------------------------------------------------------------- # Determination of first tasks for calculate the arrival rate inter_arrival_times = arr.define_interarrival_tasks( process_graph, conformed_traces) arrival_rate_bimp = (td.get_task_distribution(inter_arrival_times, 50)) arrival_rate_bimp['startEventId'] = startEventId #------------------------------------------------------------------- # Gateways probabilities 1=Historycal, 2=Random, 3=Equiprobable sequences = gt.define_probabilities(process_graph, bpmn, log, 1) #------------------------------------------------------------------- # Tasks id information elements_data = list() i = 0 task_list = list( filter(lambda x: process_graph.node[x]['type'] == 'task', list(nx.nodes(process_graph)))) for task in task_list: task_name = process_graph.node[task]['name'] task_id = process_graph.node[task]['id'] values = list( filter(lambda x: x['task'] == task_name, process_stats)) task_processing = [x['processing_time'] for x in values] dist = td.get_task_distribution(task_processing) max_role, max_count = '', 0 role_sorted = sorted(values, key=lambda x: x['role']) for key2, group2 in itertools.groupby(role_sorted, key=lambda x: x['role']): group_count = list(group2) if len(group_count) > max_count: max_count = len(group_count) max_role = key2 elements_data.append( dict(id=sup.gen_id(), elementid=task_id, type=dist['dname'], name=task_name, mean=str(dist['dparams']['mean']), arg1=str(dist['dparams']['arg1']), arg2=str(dist['dparams']['arg2']), resource=find_resource_id(resource_pool, max_role))) sup.print_progress(((i / (len(task_list) - 1)) * 100), 'Analysing tasks data ') i += 1 sup.print_done_task() parameters = dict(arrival_rate=arrival_rate_bimp, time_table=time_table, resource_pool=resource_pool, elements_data=elements_data, sequences=sequences, instances=len(conformed_traces), bpmnId=bpmnId) return parameters, process_stats