def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Explode an OCEL: an event associated to N objects is "split" to N events, each one associated to one object. Parameters ----------------- ocel Object-centric event log parameters Possible parameters of the algorithm Returns ----------------- ocel Exploded object-centric event log """ if parameters is None: parameters = {} ocel = deepcopy(ocel) ocel.relations[ocel.event_id_column] = ocel.relations[ocel.event_id_column] + "_" + ocel.relations[ocel.object_id_column] ocel.events = ocel.relations.copy() del ocel.events[ocel.object_id_column] del ocel.events[ocel.object_type_column] return ocel
def apply(ocel: OCEL, values: Collection[Any], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the object-centric event log on the provided event attributes values Parameters ---------------- ocel Object-centric event log values Collection of values parameters Parameters of the algorithm, including: - Parameters.ATTRIBUTE_KEY => the attribute that should be filtered - Parameters.POSITIVE => decides if the values should be kept (positive=True) or removed (positive=False) Returns ---------------- ocel Filtered object-centric event log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, ocel.event_activity) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) ocel = copy(ocel) if positive: ocel.events = ocel.events[ocel.events[attribute_key].isin(values)] else: ocel.events = ocel.events[~ocel.events[attribute_key].isin(values)] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def filter_end_events_per_object_type( ocel: OCEL, object_type: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the events in which an object for the given object type terminates its lifecycle. (E.g. an event with activity "Pay Order" might terminate an order). Parameters ------------------ ocel Object-centric event log object_type Object type to consider parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => the attribute working as event identifier - Parameters.OBJECT_ID => the attribute working as object identifier - Parameters.OBJECT_TYPE => the attribute working as object type Returns ------------------ filtered_ocel Filtered object-centric event log """ if parameters is None: parameters = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column) object_type_column = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, ocel.object_type_column) evs = set( ocel.relations[ocel.relations[object_type_column] == object_type]. groupby(object_id).last()[event_id].unique()) ocel = copy(ocel) ocel.events = ocel.events[ocel.events[event_id].isin(evs)] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def sample_ocel_events(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Keeps a sample of the events of an object-centric event log Parameters ------------------ ocel Object-centric event log parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => event identifier - Parameters.NUM_EVENTS => number of events Returns ------------------ sampled_ocel Sampled object-centric event log """ if parameters is None: parameters = {} event_id_column = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) num_entities = exec_utils.get_param_value(Parameters.NUM_ENTITIES, parameters, 100) events = list(ocel.events[event_id_column].unique()) num_events = min(len(events), num_entities) random.shuffle(events) picked_events = events[:num_events] ocel = copy(ocel) ocel.events = ocel.events[ocel.events[event_id_column].isin(picked_events)] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def apply_timestamp(ocel: OCEL, min_timest: Union[datetime.datetime, str], max_timest: Union[datetime.datetime, str], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the object-centric event log keeping events in the provided timestamp range Parameters ----------------- ocel Object-centric event log min_timest Left extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS) max_timest Right extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS) parameters Parameters of the algorithm, including: - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp Returns ----------------- filtered_ocel Filtered object-centric event log """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, ocel.event_timestamp) min_timest = get_dt_from_string(min_timest) max_timest = get_dt_from_string(max_timest) ocel = copy(ocel) ocel.events = ocel.events[ocel.events[timestamp_key] >= min_timest] ocel.events = ocel.events[ocel.events[timestamp_key] <= max_timest] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def propagate_object_filtering(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Propagates the filtering at the object level to the remaining parts of the OCEL structure (events, relations) Parameters ---------------- ocel Object-centric event log parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => the column to be used as case identifier - Parameters.OBJECT_ID => the column to be used as object identifier - Parameters.OBJECT_TYPE => the column to be used as object type Returns ---------------- ocel Object-centric event log with propagated filter """ if parameters is None: parameters = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column) selected_object_ids = set(ocel.objects[object_id].unique()) ocel.relations = ocel.relations[ocel.relations[object_id].isin( selected_object_ids)] selected_event_ids = set(ocel.relations[event_id].unique()) ocel.events = ocel.events[ocel.events[event_id].isin(selected_event_ids)] return ocel
def apply(ocel: OCEL, min_num_obj_type: Dict[str, int], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the events of the object-centric logs which are related to at least the specified amount of objects per type. E.g. apply(ocel, {"order": 1, "element": 2}) Would keep the following events: ocel:eid ocel:timestamp ocel:activity ocel:type:element ocel:type:order 0 e1 1980-01-01 Create Order [i4, i1, i3, i2] [o1] 1 e11 1981-01-01 Create Order [i6, i5] [o2] 2 e14 1981-01-04 Create Order [i8, i7] [o3] Parameters ------------------ ocel Object-centric event log min_num_obj_type Minimum number of objects per type parameters Parameters of the filter, including: - Parameters.EVENT_ID => the event identifier - Parameters.OBJECT_ID => the object identifier - Parameters.OBJECT_TYPE => the object type Returns ----------------- filtered_event_log Filtered object-centric event log """ if parameters is None: parameters = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column) object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, ocel.object_type_column) num_obj = objects_ot_count.get_objects_ot_count(ocel, parameters=parameters) filt_evs = set() for evid, evobjs in num_obj.items(): is_ok = True for k, v in min_num_obj_type.items(): if not k in evobjs: is_ok = False break elif evobjs[k] < v: is_ok = False break if is_ok: filt_evs.add(evid) ocel = copy(ocel) ocel.events = ocel.events[ocel.events[event_id].isin(filt_evs)] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)