def apply(interval_log, parameters=None): """ Gets the number of times for which two activities have been concurrent in the log Parameters -------------- interval_log Interval event log parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY => activity key - Parameters.START_TIMESTAMP_KEY => start timestamp - Parameters.TIMESTAMP_KEY => complete timestamp - Parameters.STRICT => Determine if only entries that are strictly concurrent (i.e. the length of the intersection as real interval is > 0) should be obtained. Default: False Returns -------------- ret_dict Dictionaries associating to a couple of activities (tuple) the number of times for which they have been executed in parallel in the log """ if parameters is None: parameters = {} interval_log = converter.apply(interval_log, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) strict = exec_utils.get_param_value(Parameters.STRICT, parameters, False) ret_dict = {} for trace in interval_log: sorted_trace = sorting.sort_timestamp_trace(trace, start_timestamp_key) i = 0 while i < len(sorted_trace): act1 = sorted_trace[i][activity_key] ts1 = sorted_trace[i][start_timestamp_key] tc1 = sorted_trace[i][timestamp_key] j = i + 1 while j < len(sorted_trace): ts2 = sorted_trace[j][start_timestamp_key] tc2 = sorted_trace[j][timestamp_key] act2 = sorted_trace[j][activity_key] if max(ts1, ts2) <= min(tc1, tc2): if not strict or max(ts1, ts2) < min(tc1, tc2): # avoid getting two entries for the same set of concurrent activities tup = tuple(sorted((act1, act2))) if tup not in ret_dict: ret_dict[tup] = 0 ret_dict[tup] = ret_dict[tup] + 1 else: break j = j + 1 i = i + 1 return ret_dict
def apply(interval_log, parameters=None): """ Gets the number of times for which two activities have been concurrent in the log_skeleton Parameters -------------- interval_log Interval event log_skeleton parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY => activity key - Parameters.START_TIMESTAMP_KEY => start timestamp - Parameters.TIMESTAMP_KEY => complete timestamp Returns -------------- ret_dict Dictionaries associating to a couple of activities (tuple) the number of times for which they have been executed in parallel in the log_skeleton """ if parameters is None: parameters = {} interval_log = converter.apply(interval_log, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) ret_dict = {} for trace in interval_log: sorted_trace = sorting.sort_timestamp_trace(trace, start_timestamp_key) i = 0 while i < len(sorted_trace): act1 = sorted_trace[i][activity_key] ts1 = sorted_trace[i][start_timestamp_key] tc1 = sorted_trace[i][timestamp_key] j = i + 1 while j < len(sorted_trace): ts2 = sorted_trace[j][start_timestamp_key] tc2 = sorted_trace[j][timestamp_key] act2 = sorted_trace[j][activity_key] if max(ts1, ts2) <= min(tc1, tc2): tup = (act1, act2) if not tup in ret_dict: ret_dict[tup] = 0 ret_dict[tup] = ret_dict[tup] + 1 else: break j = j + 1 i = i + 1 return ret_dict
def apply( interval_log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None ) -> Dict[Tuple[str, str], int]: if parameters is None: parameters = {} interval_log = converter.apply(interval_log, variant=converter.Variants.TO_EVENT_LOG, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) keep_first_following = exec_utils.get_param_value( Parameters.KEEP_FIRST_FOLLOWING, parameters, False) ret_dict = {} for trace in interval_log: sorted_trace = sorting.sort_timestamp_trace(trace, start_timestamp_key) i = 0 while i < len(sorted_trace): act1 = sorted_trace[i][activity_key] tc1 = sorted_trace[i][timestamp_key] j = i + 1 while j < len(sorted_trace): ts2 = sorted_trace[j][start_timestamp_key] act2 = sorted_trace[j][activity_key] if tc1 <= ts2: tup = (act1, act2) if tup not in ret_dict: ret_dict[tup] = 0 ret_dict[tup] = ret_dict[tup] + 1 if keep_first_following: break j = j + 1 i = i + 1 return ret_dict
def to_interval(log, parameters=None): """ Converts a log to interval format (e.g. an event has two timestamps) from lifecycle format (an event has only a timestamp, and a transition lifecycle) Parameters ------------- log Log (expressed in the lifecycle format) parameters Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...) Returns ------------- log Interval event log """ if parameters is None: parameters = {} timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY start_timestamp_key = parameters[ constants. PARAMETER_CONSTANT_START_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY in parameters else xes.DEFAULT_START_TIMESTAMP_KEY transition_key = parameters[ constants. PARAMETER_CONSTANT_TRANSITION_KEY] if constants.PARAMETER_CONSTANT_TRANSITION_KEY in parameters else xes.DEFAULT_TRANSITION_KEY activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY business_hours = parameters[ "business_hours"] if "business_hours" in parameters else False worktiming = parameters["worktiming"] if "worktiming" in parameters else [ 7, 17 ] weekends = parameters["weekends"] if "weekends" in parameters else [6, 7] if log is not None and len(log) > 0: if "PM4PY_TYPE" in log.attributes and log.attributes[ "PM4PY_TYPE"] == "interval": return log if log[0] is not None and len(log[0]) > 0: first_event = log[0][0] if start_timestamp_key in first_event: return log new_log = EventLog() new_log.attributes["PM4PY_TYPE"] = "interval" for trace in log: new_trace = Trace() for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] activities_start = {} for event in trace: activity = event[activity_key] transition = event[ transition_key] if transition_key in event else "complete" timestamp = event[timestamp_key] if transition.lower() == "start": if activity not in activities_start: activities_start[activity] = list() activities_start[activity].append(event) elif transition.lower() == "complete": start_event = None start_timestamp = event[timestamp_key] if activity in activities_start and len( activities_start[activity]) > 0: start_event = activities_start[activity].pop(0) start_timestamp = start_event[timestamp_key] new_event = Event() for attr in event: if not attr == timestamp_key and not attr == transition_key: new_event[attr] = event[attr] if start_event is not None: for attr in start_event: if not attr == timestamp_key and not attr == transition_key: new_event["@@startevent_" + attr] = start_event[attr] new_event[start_timestamp_key] = start_timestamp new_event[timestamp_key] = timestamp new_event["@@duration"] = ( timestamp - start_timestamp).total_seconds() if business_hours: bh = BusinessHours( start_timestamp.replace(tzinfo=None), timestamp.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) new_event["@@approx_bh_duration"] = bh.getseconds() new_trace.append(new_event) new_trace = sorting.sort_timestamp_trace(new_trace, start_timestamp_key) new_log.append(new_trace) return new_log return log
def to_interval(log, parameters=None): """ Converts a log to interval format (e.g. an event has two timestamps) from lifecycle format (an event has only a timestamp, and a transition lifecycle) Parameters ------------- log Log (expressed in the lifecycle format) parameters Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...) Returns ------------- log Interval event log """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY) start_timestamp_key = exec_utils.get_param_value( Parameters.START_TIMESTAMP_KEY, parameters, xes.DEFAULT_START_TIMESTAMP_KEY) transition_key = exec_utils.get_param_value(Parameters.TRANSITION_KEY, parameters, xes.DEFAULT_TRANSITION_KEY) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) lifecycle_instance_key = exec_utils.get_param_value( Parameters.LIFECYCLE_INSTANCE_KEY, parameters, xes.DEFAULT_INSTANCE_KEY) business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False) worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17]) weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7]) if log is not None and len(log) > 0: if "PM4PY_TYPE" in log.attributes and log.attributes[ "PM4PY_TYPE"] == "interval": return log if log[0] is not None and len(log[0]) > 0: first_event = log[0][0] if start_timestamp_key in first_event: return log new_log = EventLog(attributes=copy(log.attributes), extensions=copy(log.extensions), classifiers=copy(log.classifiers), omni_present=copy(log.omni_present), properties=copy(log.properties)) new_log.attributes["PM4PY_TYPE"] = "interval" new_log.properties[ constants. PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = xes.DEFAULT_START_TIMESTAMP_KEY for trace in log: new_trace = Trace() for attr in trace.attributes: new_trace.attributes[attr] = trace.attributes[attr] activities_start = {} for event in trace: activity = event[activity_key] instance = event[ lifecycle_instance_key] if lifecycle_instance_key in event else None activity = (activity, instance) transition = event[ transition_key] if transition_key in event else "complete" timestamp = event[timestamp_key] if transition.lower() == "start": if activity not in activities_start: activities_start[activity] = list() activities_start[activity].append(event) elif transition.lower() == "complete": start_event = None start_timestamp = event[timestamp_key] if activity in activities_start and len( activities_start[activity]) > 0: start_event = activities_start[activity].pop(0) start_timestamp = start_event[timestamp_key] new_event = Event() for attr in event: if not attr == timestamp_key and not attr == transition_key: new_event[attr] = event[attr] if start_event is not None: for attr in start_event: if not attr == timestamp_key and not attr == transition_key: new_event["@@startevent_" + attr] = start_event[attr] new_event[start_timestamp_key] = start_timestamp new_event[timestamp_key] = timestamp new_event["@@duration"] = ( timestamp - start_timestamp).total_seconds() if business_hours: bh = BusinessHours( start_timestamp.replace(tzinfo=None), timestamp.replace(tzinfo=None), worktiming=worktiming, weekends=weekends) new_event["@@approx_bh_duration"] = bh.getseconds() new_trace.append(new_event) new_trace = sorting.sort_timestamp_trace(new_trace, start_timestamp_key) new_log.append(new_trace) return new_log return log