def execute_script(): ts1 = 100000000 ts2 = 110000000 d1 = datetime.datetime.fromtimestamp(ts1) d2 = datetime.datetime.fromtimestamp(ts2) print(ts2 - ts1) # default business hours: all the days of the week except Saturday and Sunday are working days. bh1 = BusinessHours(d1, d2, worktiming=[[7, 12.5], [13, 17]]) print(bh1.getseconds()) # let's calculate the business hours using a proper work calendar. bh2 = BusinessHours(d1, d2, worktiming=[[7, 12.25], [13.25, 17]], workcalendar=Italy()) print(bh2.getseconds())
def insert_time_from_previous(log, parameters=None): """ Inserts the time from the previous event, both in normal and business hours Parameters ------------- log Event log parameters Parameters of the algorithm Returns ------------- enriched_log Enriched log (with the time passed from the previous event) """ if parameters is None: parameters = {} timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] if not type(log) is EventLog: log = log_converter.apply(log) log = sorting.sort_timestamp_log(log, timestamp_key) for trace in log: if trace: trace[0]["@@passed_time_from_previous"] = 0 trace[0]["@@approx_bh_passed_time_from_previous"] = 0 i = 1 while i < len(trace): trace[i]["@@passed_time_from_previous"] = ( trace[i][timestamp_key] - trace[i - 1][timestamp_key]).total_seconds() bh = BusinessHours( trace[i - 1][timestamp_key].replace(tzinfo=None), trace[i][timestamp_key].replace(tzinfo=None), worktiming=worktiming, weekends=weekends) trace[i][ "@@approx_bh_passed_time_from_previous"] = bh.getseconds() i = i + 1 return log
def get_case_dispersion_avg(log, parameters=None): """ Gets the average time interlapsed between case ends Parameters -------------- log Trace log parameters Parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> attribute of the log to be used as timestamp Returns -------------- case_arrival_avg Average time interlapsed between case starts """ if parameters is None: parameters = {} business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) case_end_time = [ trace[-1][timestamp_key] for trace in log if trace and timestamp_key in trace[0] ] case_end_time = sorted(case_end_time) case_diff_end_time = [] for i in range(len(case_end_time) - 1): if business_hours: bh = BusinessHours(case_end_time[i].replace(tzinfo=None), case_end_time[i + 1].replace(tzinfo=None), worktiming=worktiming, weekends=weekends) case_diff_end_time.append(bh.getseconds()) else: case_diff_end_time.append( (case_end_time[i + 1] - case_end_time[i]).total_seconds()) if case_diff_end_time: return statistics.mean(case_diff_end_time) return 0.0
def get_case_arrival_avg(log, parameters=None): """ Gets the average time interlapsed between case starts Parameters -------------- log Trace log parameters Parameters of the algorithm, including: PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the log to be used as timestamp Returns -------------- case_arrival_avg Average time interlapsed between case starts """ if parameters is None: parameters = {} business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY case_start_time = [ trace[0][timestamp_key] for trace in log if trace and timestamp_key in trace[0] ] case_start_time = sorted(case_start_time) case_diff_start_time = [] for i in range(len(case_start_time) - 1): if business_hours: bh = BusinessHours(case_start_time[i].replace(tzinfo=None), case_start_time[i + 1].replace(tzinfo=None), worktiming=worktiming, weekends=weekends) case_diff_start_time.append(bh.getseconds()) else: case_diff_start_time.append( (case_start_time[i + 1] - case_start_time[i]).total_seconds()) if case_diff_start_time: return statistics.median(case_diff_start_time) return 0.0
def get_remaining_time_from_log(log, max_len_trace=100000, parameters=None): """ Gets the remaining time for the instances given a log and a trace index Parameters ------------ log Log max_len_trace Index parameters Parameters of the algorithm Returns ------------ list List of remaining times """ if parameters is None: parameters = {} timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] y_orig = [] for trace in log: y_orig.append([]) for index, event in enumerate(trace): if index >= max_len_trace: break timestamp_st = trace[index][timestamp_key] timestamp_et = trace[-1][timestamp_key] if business_hours: bh = BusinessHours(timestamp_st.replace(tzinfo=None), timestamp_et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) y_orig[-1].append(bh.getseconds()) else: y_orig[-1].append( (timestamp_et - timestamp_st).total_seconds()) while len(y_orig[-1]) < max_len_trace: y_orig[-1].append(y_orig[-1][-1]) return y_orig
def statics(request): logadr = ADRESS log = infra.recieve_and_convert_log.convert_log(logadr, logname, logtime, logtran, logstart, logcompl, logreso, logid, inputname[-3:]) ptree = infra.recieve_and_convert_log.get_processtree(log) #duration = infra.recieve_and_convert_log.get_duration(log) duration = Duration #deviation = infra.recieve_and_convert_log.get_deviation(duration,log) deviation = Deviation #waitingtime = infra.recieve_and_convert_log.waitingtime(log) waitingtime = Waitingtime #frequency = infra.recieve_and_convert_log.get_waitinhour(log,Waitingtime,'n',Watichange) #frequency = Frequency st = datetime.fromtimestamp(100000000) et = datetime.fromtimestamp(200000000) bh_object = BusinessHours(st, et) worked_time = bh_object.getseconds() #log_path = os.path.join("tests","input_data","receipt.xes") initialtrace = infra.recieve_and_convert_log.initialtrace(log) x, y = case_statistics.get_kde_caseduration( log, parameters={ constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "time:timestamp" }) gviz1 = graphs_visualizer.apply_plot( x, y, variant=graphs_visualizer.Variants.CASES) gviz2 = graphs_visualizer.apply_semilogx( x, y, variant=graphs_visualizer.Variants.CASES) graphs_visualizer.save(gviz1, "DES1/static/image1.gv.png") graphs_visualizer.save(gviz2, "DES1/static/image2.gv.png") x, y = attributes_filter.get_kde_date_attribute(log, attribute="time:timestamp") gviz3 = graphs_visualizer.apply_plot( x, y, variant=graphs_visualizer.Variants.DATES) graphs_visualizer.save(gviz3, "DES1/static/image3.gv.png") ''' x, y = attributes_filter.get_kde_numeric_attribute(log, "amount") gviz4 = graphs_visualizer.apply_plot(x, y, variant=graphs_visualizer.Variants.ATTRIBUTES) gviz5 = graphs_visualizer.apply_semilogx(x, y, variant=graphs_visualizer.Variants.ATTRIBUTES) graphs_visualizer.save(gviz4,"./static/image4.gv.png") graphs_visualizer.save(gviz5,"./static/image5.gv.png") ''' numtrace = infra.recieve_and_convert_log.statics(log)[0] numactivity = infra.recieve_and_convert_log.statics(log)[1] activitylist = infra.recieve_and_convert_log.statics(log)[2] timeinterval = infra.recieve_and_convert_log.statics(log)[3] meanthoughputtime = infra.recieve_and_convert_log.statics(log)[4][0] deviationthoughputtime = infra.recieve_and_convert_log.statics(log)[4][1] arrivalratio = infra.recieve_and_convert_log.statics(log)[5] dispersionratio = infra.recieve_and_convert_log.statics(log)[6] resourcedict = infra.recieve_and_convert_log.initialresource1(log) initialcapacity = infra.recieve_and_convert_log.computecapacity(log) initiallimit = infra.recieve_and_convert_log.initiallimit(log)[0] initialcaplim = [] for i in range(len(initialcapacity)): initialcaplim.append( (initialcapacity[i][0], initialcapacity[i][1], initiallimit[i][1])) #print(intialcapacity,"line 205") Actresource = roles_discovery.apply( log, variant=None, parameters={rpd.Parameters.RESOURCE_KEY: logreso}) list0 = [] infra.recieve_and_convert_log.notdoact(ptree, list0) handover = infra.recieve_and_convert_log.getactivityresourcecount( log, list0, logname, logreso)[1] for i, x in enumerate(duration): duration[i] = (x[0], round(x[1], 2), round(deviation[i][1], 2)) for i, x in enumerate(deviation): deviation[i] = (x[0], round(x[1], 2)) context = {'log':log,'ptree':ptree,'duration':duration,'deviation':deviation,\ 'worked_time':worked_time,'numtrace':numtrace,'numactivity':numactivity,'activitylist':activitylist,\ 'timeinterval':timeinterval,'meanthoughputtime':meanthoughputtime,\ 'deviationthoughputtime':deviationthoughputtime,'arrivalratio':arrivalratio,\ 'dispersionratio':dispersionratio,'resourcedict':Actresource,'handover':handover,"initialcaplim":initialcaplim,'initialtrace':initialtrace} return render(request, 'statics.html', context)
def get_class_representation_by_trace_duration(log, target_trace_duration, timestamp_key="time:timestamp", parameters=None): """ Get class representation by splitting traces according to trace duration Parameters ------------ log Trace log target_trace_duration Target trace duration timestamp_key Timestamp key Returns ------------ target Target part for decision tree learning classes Name of the classes, in order """ if parameters is None: parameters = {} business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] count = 0 dictionary = {} target = [] classes = [] for trace in log: value = "LESSEQUAL" if len(trace) > 0 and timestamp_key in trace[ 0] and timestamp_key in trace[-1]: timestamp_st = trace[0][timestamp_key] timestamp_et = trace[-1][timestamp_key] if business_hours: bh = BusinessHours(timestamp_st.replace(tzinfo=None), timestamp_et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) diff = bh.getseconds() else: diff = (timestamp_et - timestamp_st).total_seconds() if diff > target_trace_duration: value = "GREATER" if not str(value) in dictionary: dictionary[str(value)] = count classes.append(str(value)) count = count + 1 target.append(dictionary[str(value)]) target = np.array(target) return target, classes
def get_case_arrival_avg( log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> float: """ Gets the average time interlapsed between case starts Parameters -------------- log Trace log parameters Parameters of the algorithm, including: Parameters.TIMESTAMP_KEY -> attribute of the log to be used as timestamp Returns -------------- case_arrival_avg Average time interlapsed between case starts """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) workcalendar = exec_utils.get_param_value( Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) case_start_time = [ trace[0][timestamp_key] for trace in log if trace and timestamp_key in trace[0] ] case_start_time = sorted(case_start_time) case_diff_start_time = [] for i in range(len(case_start_time) - 1): if business_hours: bh = BusinessHours(case_start_time[i].replace(tzinfo=None), case_start_time[i + 1].replace(tzinfo=None), worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) case_diff_start_time.append(bh.getseconds()) else: case_diff_start_time.append( (case_start_time[i + 1] - case_start_time[i]).total_seconds()) if case_diff_start_time: return statistics.mean(case_diff_start_time) return 0.0
def to_interval(log, parameters=None): """ Converts a log to interval format (e.g. an event has two timestamps) from lifecycle format (an event has only a timestamp, and a transition lifecycle) Parameters ------------- log Log (expressed in the lifecycle format) parameters Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...) Returns ------------- log Interval event log """ if parameters is None: parameters = {} timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY start_timestamp_key = parameters[ constants. PARAMETER_CONSTANT_START_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY in parameters else xes.DEFAULT_START_TIMESTAMP_KEY transition_key = parameters[ constants. PARAMETER_CONSTANT_TRANSITION_KEY] if constants.PARAMETER_CONSTANT_TRANSITION_KEY in parameters else xes.DEFAULT_TRANSITION_KEY activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] if log is not None and len(log) > 0: if "PM4PY_TYPE" in log.attributes and log.attributes[ "PM4PY_TYPE"] == "interval": return log if log[0] is not None and len(log[0]) > 0: first_event = log[0][0] if start_timestamp_key in first_event: return log new_log = EventLog() new_log.attributes["PM4PY_TYPE"] = "interval" for trace in log: new_trace = Trace() for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] activities_start = {} for event in trace: activity = event[activity_key] transition = event[ transition_key] if transition_key in event else "complete" timestamp = event[timestamp_key] if transition.lower() == "start": if activity not in activities_start: activities_start[activity] = list() activities_start[activity].append(event) elif transition.lower() == "complete": start_event = None start_timestamp = event[timestamp_key] if activity in activities_start and len( activities_start[activity]) > 0: start_event = activities_start[activity].pop(0) start_timestamp = start_event[timestamp_key] new_event = Event() for attr in event: if not attr == timestamp_key and not attr == transition_key: new_event[attr] = event[attr] if start_event is not None: for attr in start_event: if not attr == timestamp_key and not attr == transition_key: new_event["@@startevent_" + attr] = start_event[attr] new_event[start_timestamp_key] = start_timestamp new_event[timestamp_key] = timestamp new_event["@@duration"] = ( timestamp - start_timestamp).total_seconds() if business_hours: bh = BusinessHours( start_timestamp.replace(tzinfo=None), timestamp.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) new_event["@@approx_bh_duration"] = bh.getseconds() new_trace.append(new_event) new_trace = sorting.sort_timestamp_trace(new_trace, start_timestamp_key) new_log.append(new_trace) return new_log return log
def assign_lead_cycle_time(log, parameters=None): """ Assigns the lead and cycle time to an interval log Parameters ------------- log Interval log parameters Parameters of the algorithm, including: start_timestamp_key, timestamp_key, worktiming, weekends """ if parameters is None: parameters = {} start_timestamp_key = parameters[ constants. PARAMETER_CONSTANT_START_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY in parameters else xes.DEFAULT_START_TIMESTAMP_KEY timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] interval_log = to_interval(log, parameters=parameters) for trace in interval_log: approx_partial_lead_time = 0 approx_partial_cycle_time = 0 approx_wasted_time = 0 max_et = None max_et_seconds = 0 for i in range(len(trace)): this_wasted_time = 0 st = trace[i][start_timestamp_key] st_seconds = st.timestamp() et = trace[i][timestamp_key] et_seconds = et.timestamp() if max_et_seconds > 0 and st_seconds > max_et_seconds: bh_unworked = BusinessHours(max_et.replace(tzinfo=None), st.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) unworked_sec = bh_unworked.getseconds() approx_partial_lead_time = approx_partial_lead_time + unworked_sec approx_wasted_time = approx_wasted_time + unworked_sec this_wasted_time = unworked_sec if st_seconds > max_et_seconds: bh = BusinessHours(st.replace(tzinfo=None), et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) approx_bh_duration = bh.getseconds() approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration elif st_seconds < max_et_seconds and et_seconds > max_et_seconds: bh = BusinessHours(max_et.replace(tzinfo=None), et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) approx_bh_duration = bh.getseconds() approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration if et_seconds > max_et_seconds: max_et_seconds = et_seconds max_et = et ratio_cycle_lead_time = 1 if approx_partial_lead_time > 0: ratio_cycle_lead_time = approx_partial_cycle_time / approx_partial_lead_time trace[i][ "@@approx_bh_partial_cycle_time"] = approx_partial_cycle_time trace[i][ "@@approx_bh_partial_lead_time"] = approx_partial_lead_time trace[i]["@@approx_bh_overall_wasted_time"] = approx_wasted_time trace[i]["@@approx_bh_this_wasted_time"] = this_wasted_time trace[i][ "@approx_bh_ratio_cycle_lead_time"] = ratio_cycle_lead_time return interval_log
def get_cases_description(log, parameters=None): """ Get a description of traces present in the log Parameters ----------- log Log parameters Parameters of the algorithm, including: case_id_key -> Trace attribute in which the case ID is contained timestamp_key -> Column that identifies the timestamp enable_sort -> Enable sorting of traces sort_by_index -> Sort the traces using this index: 0 -> case ID 1 -> start time 2 -> end time 3 -> difference sort_ascending -> Set sort direction (boolean; it true then the sort direction is ascending, otherwise descending) max_ret_cases -> Set the maximum number of returned traces Returns ----------- ret Dictionary of traces associated to their start timestamp, their end timestamp and their duration """ if parameters is None: parameters = {} case_id_key = parameters[ PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else DEFAULT_TRACEID_KEY timestamp_key = parameters[ PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY enable_sort = parameters[ "enable_sort"] if "enable_sort" in parameters else True sort_by_index = parameters[ "sort_by_index"] if "sort_by_index" in parameters else 0 sort_ascending = parameters[ "sort_ascending"] if "sort_ascending" in parameters else True max_ret_cases = parameters[ "max_ret_cases"] if "max_ret_cases" in parameters else None business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] statistics_list = [] for index, trace in enumerate(log): if trace: ci = trace.attributes[ case_id_key] if case_id_key in trace.attributes else "EMPTY" + str( index) st = trace[0][timestamp_key] et = trace[-1][timestamp_key] if business_hours: bh = BusinessHours(st.replace(tzinfo=None), et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) diff = bh.getseconds() else: diff = et.timestamp() - st.timestamp() st = st.timestamp() et = et.timestamp() statistics_list.append([ci, st, et, diff]) if enable_sort: statistics_list = sorted(statistics_list, key=lambda x: x[sort_by_index], reverse=not sort_ascending) if max_ret_cases is not None: statistics_list = statistics_list[:min(len(statistics_list ), max_ret_cases)] statistics_dict = {} for el in statistics_list: statistics_dict[str(el[0])] = { "startTime": el[1], "endTime": el[2], "caseDuration": el[3] } return statistics_dict
def apply( log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Dict[str, float]: """ Gets the sojourn time per activity on an event log object Parameters -------------- dataframe Pandas dataframe parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY => activity key - Parameters.START_TIMESTAMP_KEY => start timestamp key - Parameters.TIMESTAMP_KEY => timestamp key - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time. Default: False - Parameters.WORKTIMING => work schedule of the company (provided as a list where the first number is the start of the work time, and the second number is the end of the work time), if business hours are enabled Default: [7, 17] (work shift from 07:00 to 17:00) - Parameters.WEEKENDS => indexes of the days of the week that are weekend Default: [6, 7] (weekends are Saturday and Sunday) - Parameters.AGGREGATION_MEASURE => performance aggregation measure (sum, min, max, mean, median) Returns -------------- soj_time_dict Sojourn time dictionary """ if parameters is None: parameters = {} business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) workcalendar = exec_utils.get_param_value( Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) aggregation_measure = exec_utils.get_param_value( Parameters.AGGREGATION_MEASURE, parameters, "mean") durations_dict = {} activities = [ev[activity_key] for trace in log for ev in trace] for act in activities: durations_dict[act] = [] for trace in log: for event in trace: activity = event[activity_key] if business_hours: bh = BusinessHours( event[start_timestamp_key].replace(tzinfo=None), event[timestamp_key].replace(tzinfo=None), worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) durations_dict[activity].append(bh.getseconds()) else: start_time = event[start_timestamp_key].timestamp() complete_time = event[timestamp_key].timestamp() durations_dict[activity].append(complete_time - start_time) for act in durations_dict: if aggregation_measure == "median": durations_dict[act] = median(durations_dict[act]) elif aggregation_measure == "min": durations_dict[act] = min(durations_dict[act]) elif aggregation_measure == "max": durations_dict[act] = max(durations_dict[act]) elif aggregation_measure == "sum": durations_dict[act] = sum(durations_dict[act]) else: durations_dict[act] = mean(durations_dict[act]) return durations_dict
def apply( log: EventLog, parameters: Optional[Dict[Any, Any]] = None) -> typing.TemporalProfile: """ Gets the temporal profile from the log. Implements the approach described in: Stertz, Florian, Jürgen Mangler, and Stefanie Rinderle-Ma. "Temporal Conformance Checking at Runtime based on Time-infused Process Models." arXiv preprint arXiv:2008.07262 (2020). Parameters ---------- log Event log parameters Parameters, including: - Parameters.ACTIVITY_KEY => the attribute to use as activity - Parameters.START_TIMESTAMP_KEY => the attribute to use as start timestamp - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time. Default: False - Parameters.WORKTIMING => work schedule of the company (provided as a list where the first number is the start of the work time, and the second number is the end of the work time), if business hours are enabled Default: [7, 17] (work shift from 07:00 to 17:00) - Parameters.WEEKENDS => indexes of the days of the week that are weekend Default: [6, 7] (weekends are Saturday and Sunday) Returns ------- temporal_profile Temporal profile of the log """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) workcalendar = exec_utils.get_param_value( Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) diff_time_recordings = {} for trace in log: for i in range(len(trace) - 1): act_i = trace[i][activity_key] time_i = trace[i][timestamp_key].timestamp() for j in range(i + 1, len(trace)): time_j = trace[j][start_timestamp_key].timestamp() if time_j >= time_i: act_j = trace[j][activity_key] if not (act_i, act_j) in diff_time_recordings: diff_time_recordings[(act_i, act_j)] = [] if business_hours: bh = BusinessHours( trace[i][timestamp_key].replace(tzinfo=None), trace[j][start_timestamp_key].replace(tzinfo=None), worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) diff_time_recordings[(act_i, act_j)].append(bh.getseconds()) else: diff_time_recordings[(act_i, act_j)].append(time_j - time_i) temporal_profile = {} for ac in diff_time_recordings: if len(diff_time_recordings[ac]) > 1: temporal_profile[ac] = (mean(diff_time_recordings[ac]), stdev(diff_time_recordings[ac])) else: temporal_profile[ac] = (diff_time_recordings[ac][0], 0) return temporal_profile
case_dispersion_ratio #Peformance Spectrum from pm4py.statistics.performance_spectrum import algorithm as performance_spectrum ps = performance_spectrum.apply(log, ["register request", "decide"], parameters= {performance_spectrum.Parameters.ACTIVITY_KEY: "concept:name", performance_spectrum.Parameters.TIMESTAMP_KEY: "time:timestamp"}) ps #Business Hours from pm4py.util.business_hours import BusinessHours from datetime import datetime st = datetime.fromtimestamp(100000000) et = datetime.fromtimestamp(200000000) bh_object = BusinessHours(st, et) worked_time = bh_object.getseconds() print(worked_time) #Cycle Time and Waiting Time from pm4py.objects.log.util import interval_lifecycle enriched_log = interval_lifecycle.assign_lead_cycle_time(log) #error #display graphs import os from pm4py.objects.log.importer.xes import importer as xes_importer log_path = os.path.join("tests","input_data","receipt.xes") log = xes_importer.apply(log_path) from pm4py.util import constants
def train(log, parameters=None): """ Train the prediction model Parameters ----------- log Event log parameters Possible parameters of the algorithm Returns ------------ model Trained model """ if parameters is None: parameters = {} parameters["enable_sort"] = False activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] y_orig = parameters["y_orig"] if "y_orig" in parameters else None log = sorting.sort_timestamp(log, timestamp_key) str_evsucc_attr = [activity_key] if "str_ev_attr" in parameters: str_tr_attr = parameters[ "str_tr_attr"] if "str_tr_attr" in parameters else [] str_ev_attr = parameters[ "str_ev_attr"] if "str_ev_attr" in parameters else [] num_tr_attr = parameters[ "num_tr_attr"] if "num_tr_attr" in parameters else [] num_ev_attr = parameters[ "num_ev_attr"] if "num_ev_attr" in parameters else [] else: str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = attributes_filter.select_attributes_from_log_for_tree( log) if activity_key not in str_ev_attr: str_ev_attr.append(activity_key) max_trace_length = max(len(x) for x in log) if max_trace_length == 1: # this you shall use data, feature_names = get_log_representation.get_representation( log, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr, str_evsucc_attr=str_evsucc_attr) ext_log = log else: ext_log, change_indexes = get_log_with_log_prefixes(log) data, feature_names = get_log_representation.get_representation( ext_log, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr, str_evsucc_attr=str_evsucc_attr) if y_orig is not None: remaining_time = [y for x in y_orig for y in x] else: if business_hours: remaining_time = [] for trace in ext_log: if trace: timestamp_et = trace[-1][timestamp_key] timestamp_st = trace[0][timestamp_key] bh = BusinessHours(timestamp_st.replace(tzinfo=None), timestamp_et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) remaining_time.append(bh.getseconds()) else: remaining_time.append(0) else: remaining_time = [] for trace in ext_log: if trace: remaining_time.append( (trace[-1][timestamp_key] - trace[0][timestamp_key]).total_seconds()) else: remaining_time.append(0) regr = ElasticNet(max_iter=10000, l1_ratio=0.7) print(data) regr.fit(data, remaining_time) return { "str_tr_attr": str_tr_attr, "str_ev_attr": str_ev_attr, "num_tr_attr": num_tr_attr, "num_ev_attr": num_ev_attr, "str_evsucc_attr": str_evsucc_attr, "feature_names": feature_names, "remaining_time": remaining_time, "regr": regr, "variant": "elasticnet" }
def to_interval(log, parameters=None): """ Converts a log to interval format (e.g. an event has two timestamps) from lifecycle format (an event has only a timestamp, and a transition lifecycle) Parameters ------------- log Log (expressed in the lifecycle format) parameters Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...) Returns ------------- log Interval event log """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes.DEFAULT_START_TIMESTAMP_KEY) transition_key = exec_utils.get_param_value(Parameters.TRANSITION_KEY, parameters, xes.DEFAULT_TRANSITION_KEY) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) lifecycle_instance_key = exec_utils.get_param_value( Parameters.LIFECYCLE_INSTANCE_KEY, parameters, xes.DEFAULT_INSTANCE_KEY) business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) if log is not None and len(log) > 0: if "PM4PY_TYPE" in log.attributes and log.attributes[ "PM4PY_TYPE"] == "interval": return log if log[0] is not None and len(log[0]) > 0: first_event = log[0][0] if start_timestamp_key in first_event: return log new_log = EventLog(attributes=copy(log.attributes), extensions=copy(log.extensions), classifiers=copy(log.classifiers), omni_present=copy(log.omni_present), properties=copy(log.properties)) new_log.attributes["PM4PY_TYPE"] = "interval" new_log.properties[ constants. PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = xes.DEFAULT_START_TIMESTAMP_KEY for trace in log: new_trace = Trace() for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] activities_start = {} for event in trace: activity = event[activity_key] instance = event[ lifecycle_instance_key] if lifecycle_instance_key in event else None activity = (activity, instance) transition = event[ transition_key] if transition_key in event else "complete" timestamp = event[timestamp_key] if transition.lower() == "start": if activity not in activities_start: activities_start[activity] = list() activities_start[activity].append(event) elif transition.lower() == "complete": start_event = None start_timestamp = event[timestamp_key] if activity in activities_start and len( activities_start[activity]) > 0: start_event = activities_start[activity].pop(0) start_timestamp = start_event[timestamp_key] new_event = Event() for attr in event: if not attr == timestamp_key and not attr == transition_key: new_event[attr] = event[attr] if start_event is not None: for attr in start_event: if not attr == timestamp_key and not attr == transition_key: new_event["@@startevent_" + attr] = start_event[attr] new_event[start_timestamp_key] = start_timestamp new_event[timestamp_key] = timestamp new_event["@@duration"] = ( timestamp - start_timestamp).total_seconds() if business_hours: bh = BusinessHours( start_timestamp.replace(tzinfo=None), timestamp.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) new_event["@@approx_bh_duration"] = bh.getseconds() new_trace.append(new_event) new_trace = sorting.sort_timestamp_trace(new_trace, start_timestamp_key) new_log.append(new_trace) return new_log return log
def assign_lead_cycle_time(log, parameters=None): """ Assigns the lead and cycle time to an interval log Parameters ------------- log Interval log parameters Parameters of the algorithm, including: start_timestamp_key, timestamp_key, worktiming, weekends """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes.DEFAULT_START_TIMESTAMP_KEY) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) interval_log = to_interval(log, parameters=parameters) for trace in interval_log: approx_partial_lead_time = 0 approx_partial_cycle_time = 0 approx_wasted_time = 0 max_et = None max_et_seconds = 0 for i in range(len(trace)): this_wasted_time = 0 st = trace[i][start_timestamp_key] st_seconds = st.timestamp() et = trace[i][timestamp_key] et_seconds = et.timestamp() if max_et_seconds > 0 and st_seconds > max_et_seconds: bh_unworked = BusinessHours(max_et.replace(tzinfo=None), st.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) unworked_sec = bh_unworked.getseconds() approx_partial_lead_time = approx_partial_lead_time + unworked_sec approx_wasted_time = approx_wasted_time + unworked_sec this_wasted_time = unworked_sec if st_seconds > max_et_seconds: bh = BusinessHours(st.replace(tzinfo=None), et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) approx_bh_duration = bh.getseconds() approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration elif st_seconds < max_et_seconds and et_seconds > max_et_seconds: bh = BusinessHours(max_et.replace(tzinfo=None), et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) approx_bh_duration = bh.getseconds() approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration if et_seconds > max_et_seconds: max_et_seconds = et_seconds max_et = et ratio_cycle_lead_time = 1 if approx_partial_lead_time > 0: ratio_cycle_lead_time = approx_partial_cycle_time / approx_partial_lead_time trace[i][ "@@approx_bh_partial_cycle_time"] = approx_partial_cycle_time trace[i][ "@@approx_bh_partial_lead_time"] = approx_partial_lead_time trace[i]["@@approx_bh_overall_wasted_time"] = approx_wasted_time trace[i]["@@approx_bh_this_wasted_time"] = this_wasted_time trace[i][ "@approx_bh_ratio_cycle_lead_time"] = ratio_cycle_lead_time return interval_log
def single_element_statistics(log, net, initial_marking, aligned_traces, variants_idx, activity_key="concept:name", timestamp_key="time:timestamp", ht_perf_method="last", parameters=None): """ Get single Petrinet element statistics Parameters ------------ log Log net Petri net initial_marking Initial marking aligned_traces Result of the token-based replay variants_idx Variants along with indexes of belonging traces activity_key Activity key (must be specified if different from concept:name) timestamp_key Timestamp key (must be specified if different from time:timestamp) ht_perf_method Method to use in order to annotate hidden transitions (performance value could be put on the last possible point (last) or in the first possible point (first) parameters Possible parameters of the algorithm Returns ------------ statistics Petri net element statistics (frequency, unaggregated performance) """ if parameters is None: parameters = {} business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] statistics = {} for variant in variants_idx: first_trace = log[variants_idx[variant][0]] act_trans = aligned_traces[variants_idx[variant] [0]]["activated_transitions"] annotations_places_trans, annotations_arcs = calculate_annotation_for_trace( first_trace, net, initial_marking, act_trans, activity_key, ht_perf_method=ht_perf_method) for el in annotations_places_trans: if el not in statistics: statistics[el] = { "count": 0, "performance": [], "log_idx": [], "no_of_times_enabled": 0, "no_of_times_activated": 0 } statistics[el]["count"] += annotations_places_trans[el][ "count"] * len(variants_idx[variant]) if "no_of_times_enabled" in annotations_places_trans[el]: statistics[el][ "no_of_times_enabled"] += annotations_places_trans[el][ "no_of_times_enabled"] * len(variants_idx[variant]) statistics[el][ "no_of_times_activated"] += annotations_places_trans[el][ "no_of_times_activated"] * len(variants_idx[variant]) if "performance" in annotations_places_trans[el]: for trace_idx in variants_idx[variant]: trace = log[trace_idx] for perf_couple in annotations_places_trans[el][ "performance"]: if timestamp_key in trace[perf_couple[ 0]] and timestamp_key in trace[perf_couple[1]]: if business_hours: bh = BusinessHours( trace[perf_couple[1]] [timestamp_key].replace(tzinfo=None), trace[perf_couple[0]] [timestamp_key].replace(tzinfo=None), worktiming=worktiming, weekends=weekends) perf = bh.getseconds() else: perf = (trace[perf_couple[0]][timestamp_key] - trace[perf_couple[1]][timestamp_key] ).total_seconds() else: perf = 0.0 statistics[el]["performance"].append(perf) statistics[el]["log_idx"].append(trace_idx) for el in annotations_arcs: if el not in statistics: statistics[el] = {"count": 0, "performance": []} statistics[el]["count"] += annotations_arcs[el]["count"] * len( variants_idx[variant]) for trace_idx in variants_idx[variant]: trace = log[trace_idx] for perf_couple in annotations_arcs[el]["performance"]: if timestamp_key in trace[perf_couple[ 0]] and timestamp_key in trace[perf_couple[1]]: if business_hours: bh = BusinessHours( trace[perf_couple[1]][timestamp_key].replace( tzinfo=None), trace[perf_couple[0]][timestamp_key].replace( tzinfo=None), worktiming=worktiming, weekends=weekends) perf = bh.getseconds() else: perf = (trace[perf_couple[0]][timestamp_key] - trace[perf_couple[1]][timestamp_key] ).total_seconds() else: perf = 0.0 statistics[el]["performance"].append(perf) return statistics
def performance_calculation_ocel_aggregation(ocel: OCEL, aggregation: Dict[str, Dict[Tuple[str, str], Set[Any]]], parameters: Optional[Dict[Any, Any]] = None) -> Dict[ str, Dict[Tuple[str, str], List[float]]]: """ Calculates the performance based on one of the following aggregations: - aggregate_ev_couples - aggregate_total_objects Parameters ---------------- ocel Object-centric event log aggregation Aggregation calculated using one of the aforementioned methods parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => the event identifier - Parameters.EVENT_TIMESTAMP => the timestamp - Parameters.BUSINESS_HOURS => enables/disables the business hours - Parameters.WORKTIMING => the work timing (default: [7, 17]) - Parameters.WEEKENDS => the weekends (default: [6, 7]) Returns ---------------- edges_performance For each object type, associate a dictionary where to each activity couple all the times between the activities are recorded. """ if parameters is None: parameters = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) timestamp_key = exec_utils.get_param_value(Parameters.EVENT_TIMESTAMP, parameters, ocel.event_timestamp) timestamps = ocel.events.groupby(event_id)[timestamp_key].apply(list).to_dict() timestamps = {x: y[0] for x, y in timestamps.items()} business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) workcalendar = exec_utils.get_param_value(Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) ret = {} for ot in aggregation: ret[ot] = {} for act in aggregation[ot]: ret[ot][act] = [] for el in aggregation[ot][act]: if business_hours: bh = BusinessHours(timestamps[el[0]], timestamps[el[1]], worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) diff = bh.getseconds() else: diff = timestamps[el[1]].timestamp() - timestamps[el[0]].timestamp() ret[ot][act].append(diff) ret[ot][act] = sorted(ret[ot][act]) return ret
def get_cases_description(log, parameters=None): """ Get a description of traces present in the log_skeleton Parameters ----------- log Log parameters Parameters of the algorithm, including: Parameters.CASE_ID_KEY -> Trace attribute in which the case ID is contained Parameters.TIMESTAMP_KEY -> Column that identifies the timestamp Parameters.ENABLE_SORT -> Enable sorting of traces Parameters.SORT_BY_INDEX -> Sort the traces using this index: 0 -> case ID 1 -> start time 2 -> end time 3 -> difference Parameters.SORT_ASCENDING -> Set sort direction (boolean; it true then the sort direction is ascending, otherwise descending) Parameters.MAX_RET_CASES -> Set the maximum number of returned traces Returns ----------- ret Dictionary of traces associated to their start timestamp, their end timestamp and their duration """ if parameters is None: parameters = {} case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, DEFAULT_TRACEID_KEY) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY) enable_sort = exec_utils.get_param_value(Parameters.ENABLE_SORT, parameters, True) sort_by_index = exec_utils.get_param_value(Parameters.SORT_BY_INDEX, parameters, 0) sort_ascending = exec_utils.get_param_value(Parameters.SORT_ASCENDING, parameters, True) max_ret_cases = exec_utils.get_param_value(Parameters.MAX_RET_CASES, parameters, None) business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) statistics_list = [] for index, trace in enumerate(log): if trace: ci = trace.attributes[case_id_key] if case_id_key in trace.attributes else "EMPTY" + str(index) st = trace[0][timestamp_key] et = trace[-1][timestamp_key] if business_hours: bh = BusinessHours(st.replace(tzinfo=None), et.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) diff = bh.getseconds() else: diff = et.timestamp() - st.timestamp() st = st.timestamp() et = et.timestamp() statistics_list.append([ci, st, et, diff]) if enable_sort: statistics_list = sorted(statistics_list, key=lambda x: x[sort_by_index], reverse=not sort_ascending) if max_ret_cases is not None: statistics_list = statistics_list[:min(len(statistics_list), max_ret_cases)] statistics_dict = {} for el in statistics_list: statistics_dict[str(el[0])] = {"startTime": el[1], "endTime": el[2], "caseDuration": el[3]} return statistics_dict
def apply( log: EventLog, temporal_profile: typing.TemporalProfile, parameters: Optional[Dict[Any, Any]] = None ) -> typing.TemporalProfileConformanceResults: """ Checks the conformance of the log using the provided temporal profile. Implements the approach described in: Stertz, Florian, Jürgen Mangler, and Stefanie Rinderle-Ma. "Temporal Conformance Checking at Runtime based on Time-infused Process Models." arXiv preprint arXiv:2008.07262 (2020). Parameters --------------- log Event log temporal_profile Temporal profile parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY => the attribute to use as activity - Parameters.START_TIMESTAMP_KEY => the attribute to use as start timestamp - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp - Parameters.ZETA => multiplier for the standard deviation - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time. Default: False - Parameters.WORKTIMING => work schedule of the company (provided as a list where the first number is the start of the work time, and the second number is the end of the work time), if business hours are enabled Default: [7, 17] (work shift from 07:00 to 17:00) - Parameters.WEEKENDS => indexes of the days of the week that are weekend Default: [6, 7] (weekends are Saturday and Sunday) Returns --------------- list_dev A list containing, for each trace, all the deviations. Each deviation is a tuple with four elements: - 1) The source activity of the recorded deviation - 2) The target activity of the recorded deviation - 3) The time passed between the occurrence of the source activity and the target activity - 4) The value of (time passed - mean)/std for this occurrence (zeta). """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) workcalendar = exec_utils.get_param_value( Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) zeta = exec_utils.get_param_value(Parameters.ZETA, parameters, 6.0) ret = [] for trace in log: deviations = [] for i in range(len(trace) - 1): act_i = trace[i][activity_key] time_i = trace[i][timestamp_key].timestamp() for j in range(i + 1, len(trace)): time_j = trace[j][start_timestamp_key].timestamp() if time_j >= time_i: act_j = trace[j][activity_key] if (act_i, act_j) in temporal_profile: if business_hours: bh = BusinessHours( trace[i][timestamp_key].replace(tzinfo=None), trace[j][start_timestamp_key].replace( tzinfo=None), worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) this_diff = bh.getseconds() else: this_diff = time_j - time_i mean = temporal_profile[(act_i, act_j)][0] std = temporal_profile[(act_i, act_j)][1] if this_diff < mean - zeta * std or this_diff > mean + zeta * std: this_zeta = abs(this_diff - mean ) / std if std > 0 else sys.maxsize deviations.append( (act_i, act_j, this_diff, this_zeta)) ret.append(deviations) return ret