def apply(ocel: OCEL, correspondence_dict: Dict[str, Collection[str]], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters an object-centric event log keeping only the specified object types with the specified activity set (filters out the rest). Parameters ---------------- ocel Object-centric event log correspondence_dict Dictionary containing, for every object type of interest, a collection of allowed activities. Example: {"order": ["Create Order"], "element": ["Create Order", "Create Delivery"]} Keeps only the object types "order" and "element". For the "order" object type, only the activity "Create Order" is kept. For the "element" object type, only the activities "Create Order" and "Create Delivery" are kept. parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY => the activity key - Parameters.OBJECT_TYPE => the object type column Returns ----------------- filtered_ocel Filtered object-centric event log """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, ocel.event_activity) object_type_column = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, ocel.object_type_column) temp_column = exec_utils.get_param_value(Parameters.TEMP_COLUMN, parameters, "@@temp_column") temp_separator = exec_utils.get_param_value(Parameters.TEMP_SEPARATOR, parameters, "@#@#") ocel = copy(ocel) inv_dict = set() for ot in correspondence_dict: for act in correspondence_dict[ot]: inv_dict.add(act + temp_separator + ot) ocel.relations[temp_column] = ocel.relations[ activity_key] + temp_separator + ocel.relations[object_type_column] ocel.relations = ocel.relations[ocel.relations[temp_column].isin(inv_dict)] del ocel.relations[temp_column] return filtering_utils.propagate_relations_filtering(ocel, parameters=parameters)
def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None): """ Creates an iterator over the events of an object-centric event log Parameters ---------------- ocel OCEL parameters Parameters of the method, including: - Parameters.OCEL_TYPE_PREFIX => the prefix of the object types in the OCEL (default: ocel:type) Returns ---------------- yielded event The events of the OCEL, one by one. """ if parameters is None: parameters = {} ot_prefix = exec_utils.get_param_value( Parameters.OCEL_TYPE_PREFIX, parameters, ocel_constants.DEFAULT_OBJECT_TYPE_PREFIX_EXTENDED) ext_table = ocel.get_extended_table(ot_prefix) for k, ev in ext_table.iterrows(): yield { x: y for x, y in dict(ev).items() if isinstance(y, list) or not pd.isna(y) }
def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Explode an OCEL: an event associated to N objects is "split" to N events, each one associated to one object. Parameters ----------------- ocel Object-centric event log parameters Possible parameters of the algorithm Returns ----------------- ocel Exploded object-centric event log """ if parameters is None: parameters = {} ocel = deepcopy(ocel) ocel.relations[ocel.event_id_column] = ocel.relations[ocel.event_id_column] + "_" + ocel.relations[ocel.object_id_column] ocel.events = ocel.relations.copy() del ocel.events[ocel.object_id_column] del ocel.events[ocel.object_type_column] return ocel
def from_traditional_pandas(df: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Transforms a dataframe to an OCEL Parameters ----------------- df Pandas dataframe parameters Parameters of the algorithm, including: - Parameters.TARGET_OBJECT_TYPE => the name of the object type to which the cases should be mapped - Parameters.ACTIVITY_KEY => the attribute to use as activity - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp - Parameters.CASE_ID_KEY => the attribute to use as case identifier - Parameters.CASE_ATTRIBUTE_PREFIX => the prefix identifying the attributes at the case level Returns ----------------- ocel OCEL (equivalent to the provided event log) """ if parameters is None: parameters = {} target_object_type = exec_utils.get_param_value( Parameters.TARGET_OBJECT_TYPE, parameters, "OTYPE") activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) case_attribute_prefix = exec_utils.get_param_value( Parameters.CASE_ATTRIBUTE_PREFIX, parameters, constants.CASE_ATTRIBUTE_PREFIX) events = __get_events_dataframe(df, activity_key, timestamp_key, case_id_key, case_attribute_prefix) objects = __get_objects_dataframe(df, case_id_key, case_attribute_prefix, target_object_type) relations = __get_relations_from_events(events, target_object_type) del events[ocel_constants.DEFAULT_OBJECT_ID] events = events.sort_values([ ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID ]) relations = relations.sort_values([ ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID ]) return OCEL(events=events, objects=objects, relations=relations)
def apply(ocel: OCEL, values: Collection[Any], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the object-centric event log on the provided object attributes values Parameters ---------------- ocel Object-centric event log values Collection of values parameters Parameters of the algorithm, including: - Parameters.ATTRIBUTE_KEY => the attribute that should be filtered - Parameters.POSITIVE => decides if the values should be kept (positive=True) or removed (positive=False) Returns ---------------- ocel Filtered object-centric event log """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, ocel.object_type_column) positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True) ocel = copy(ocel) if positive: ocel.objects = ocel.objects[ocel.objects[attribute_key].isin(values)] else: ocel.objects = ocel.objects[~ocel.objects[attribute_key].isin(values)] return filtering_utils.propagate_object_filtering(ocel, parameters=parameters)
def propagate_event_filtering(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Propagates the filtering at the event level to the remaining parts of the OCEL structure (objects, relations) Parameters ---------------- ocel Object-centric event log parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => the column to be used as case identifier - Parameters.OBJECT_ID => the column to be used as object identifier - Parameters.OBJECT_TYPE => the column to be used as object type Returns ---------------- ocel Object-centric event log with propagated filter """ if parameters is None: parameters = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column) selected_event_ids = set(ocel.events[event_id].unique()) ocel.relations = ocel.relations[ocel.relations[event_id].isin( selected_event_ids)] selected_object_ids = set(ocel.relations[object_id].unique()) ocel.objects = ocel.objects[ocel.objects[object_id].isin( selected_object_ids)] return ocel
def filter_end_events_per_object_type( ocel: OCEL, object_type: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the events in which an object for the given object type terminates its lifecycle. (E.g. an event with activity "Pay Order" might terminate an order). Parameters ------------------ ocel Object-centric event log object_type Object type to consider parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => the attribute working as event identifier - Parameters.OBJECT_ID => the attribute working as object identifier - Parameters.OBJECT_TYPE => the attribute working as object type Returns ------------------ filtered_ocel Filtered object-centric event log """ if parameters is None: parameters = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column) object_type_column = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, ocel.object_type_column) evs = set( ocel.relations[ocel.relations[object_type_column] == object_type]. groupby(object_id).last()[event_id].unique()) ocel = copy(ocel) ocel.events = ocel.events[ocel.events[event_id].isin(evs)] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def sample_ocel_events(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Keeps a sample of the events of an object-centric event log Parameters ------------------ ocel Object-centric event log parameters Parameters of the algorithm, including: - Parameters.EVENT_ID => event identifier - Parameters.NUM_EVENTS => number of events Returns ------------------ sampled_ocel Sampled object-centric event log """ if parameters is None: parameters = {} event_id_column = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) num_entities = exec_utils.get_param_value(Parameters.NUM_ENTITIES, parameters, 100) events = list(ocel.events[event_id_column].unique()) num_events = min(len(events), num_entities) random.shuffle(events) picked_events = events[:num_events] ocel = copy(ocel) ocel.events = ocel.events[ocel.events[event_id_column].isin(picked_events)] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def apply_timestamp(ocel: OCEL, min_timest: Union[datetime.datetime, str], max_timest: Union[datetime.datetime, str], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the object-centric event log keeping events in the provided timestamp range Parameters ----------------- ocel Object-centric event log min_timest Left extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS) max_timest Right extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS) parameters Parameters of the algorithm, including: - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp Returns ----------------- filtered_ocel Filtered object-centric event log """ if parameters is None: parameters = {} timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, ocel.event_timestamp) min_timest = get_dt_from_string(min_timest) max_timest = get_dt_from_string(max_timest) ocel = copy(ocel) ocel.events = ocel.events[ocel.events[timestamp_key] >= min_timest] ocel.events = ocel.events[ocel.events[timestamp_key] <= max_timest] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Imports an object-centric event log from a JSON-OCEL file, using the default JSON backend of Python Parameters ----------------- file_path Path to the JSON-OCEL file parameters Parameters of the algorithm, including: - Parameters.EVENT_ID - Parameters.EVENT_ACTIVITY - Parameters.EVENT_TIMESTAMP - Parameters.OBJECT_ID - Parameters.OBJECT_TYPE - Parameters.INTERNAL_INDEX Returns ------------------ ocel Object-centric event log """ if parameters is None: parameters = {} ocel = json.load(open(file_path, "r")) events = [] relations = [] objects = [] event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, constants.DEFAULT_EVENT_ID) event_activity = exec_utils.get_param_value( Parameters.EVENT_ACTIVITY, parameters, constants.DEFAULT_EVENT_ACTIVITY) event_timestamp = exec_utils.get_param_value( Parameters.EVENT_TIMESTAMP, parameters, constants.DEFAULT_EVENT_TIMESTAMP) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, constants.DEFAULT_OBJECT_ID) object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, constants.DEFAULT_OBJECT_TYPE) internal_index = exec_utils.get_param_value( Parameters.INTERNAL_INDEX, parameters, constants.DEFAULT_INTERNAL_INDEX) parser = dt_parsing.parser.get() types_dict = {} for obj_id in ocel[constants.OCEL_OBJECTS_KEY]: obj = ocel[constants.OCEL_OBJECTS_KEY][obj_id] obj_type = obj[constants.DEFAULT_OBJECT_TYPE] types_dict[obj_id] = obj_type dct = {object_id: obj_id, object_type: obj_type} for k, v in obj[constants.OCEL_OVMAP_KEY].items(): dct[k] = v objects.append(dct) for ev_id in ocel[constants.OCEL_EVENTS_KEY]: ev = ocel[constants.OCEL_EVENTS_KEY][ev_id] dct = { event_id: ev_id, event_timestamp: parser.apply(ev[constants.DEFAULT_EVENT_TIMESTAMP]), event_activity: ev[constants.DEFAULT_EVENT_ACTIVITY] } for k, v in ev[constants.OCEL_VMAP_KEY].items(): dct[k] = v for obj in ev[constants.OCEL_OMAP_KEY]: relations.append({ event_id: ev_id, event_activity: ev[constants.DEFAULT_EVENT_ACTIVITY], event_timestamp: parser.apply(ev[constants.DEFAULT_EVENT_TIMESTAMP]), object_id: obj, object_type: types_dict[obj] }) events.append(dct) events = pd.DataFrame(events) objects = pd.DataFrame(objects) relations = pd.DataFrame(relations) events[internal_index] = events.index relations[internal_index] = relations.index events = events.sort_values([event_timestamp, internal_index]) relations = relations.sort_values([event_timestamp, internal_index]) del events[internal_index] del relations[internal_index] globals = {} globals[constants.OCEL_GLOBAL_LOG] = ocel[constants.OCEL_GLOBAL_LOG] globals[constants.OCEL_GLOBAL_EVENT] = ocel[constants.OCEL_GLOBAL_EVENT] globals[constants.OCEL_GLOBAL_OBJECT] = ocel[constants.OCEL_GLOBAL_OBJECT] return OCEL(events, objects, relations, globals)
def apply(ocel: OCEL, min_num_obj_type: Dict[str, int], parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Filters the events of the object-centric logs which are related to at least the specified amount of objects per type. E.g. apply(ocel, {"order": 1, "element": 2}) Would keep the following events: ocel:eid ocel:timestamp ocel:activity ocel:type:element ocel:type:order 0 e1 1980-01-01 Create Order [i4, i1, i3, i2] [o1] 1 e11 1981-01-01 Create Order [i6, i5] [o2] 2 e14 1981-01-04 Create Order [i8, i7] [o3] Parameters ------------------ ocel Object-centric event log min_num_obj_type Minimum number of objects per type parameters Parameters of the filter, including: - Parameters.EVENT_ID => the event identifier - Parameters.OBJECT_ID => the object identifier - Parameters.OBJECT_TYPE => the object type Returns ----------------- filtered_event_log Filtered object-centric event log """ if parameters is None: parameters = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column) object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, ocel.object_type_column) num_obj = objects_ot_count.get_objects_ot_count(ocel, parameters=parameters) filt_evs = set() for evid, evobjs in num_obj.items(): is_ok = True for k, v in min_num_obj_type.items(): if not k in evobjs: is_ok = False break elif evobjs[k] < v: is_ok = False break if is_ok: filt_evs.add(evid) ocel = copy(ocel) ocel.events = ocel.events[ocel.events[event_id].isin(filt_evs)] return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
def get_ocel_from_extended_table( df: pd.DataFrame, objects_df: Optional[Dict[Any, Any]] = None, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: if parameters is None: parameters = {} object_type_prefix = exec_utils.get_param_value( Parameters.OBJECT_TYPE_PREFIX, parameters, constants.DEFAULT_OBJECT_TYPE_PREFIX_EXTENDED) event_activity = exec_utils.get_param_value( Parameters.EVENT_ACTIVITY, parameters, constants.DEFAULT_EVENT_ACTIVITY) event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, constants.DEFAULT_EVENT_ID) event_timestamp = exec_utils.get_param_value( Parameters.EVENT_TIMESTAMP, parameters, constants.DEFAULT_EVENT_TIMESTAMP) object_id_column = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, constants.DEFAULT_OBJECT_ID) object_type_column = exec_utils.get_param_value( Parameters.OBJECT_TYPE, parameters, constants.DEFAULT_OBJECT_TYPE) non_object_type_columns = set(x for x in df.columns if not x.startswith(object_type_prefix)) object_type_columns = set(x for x in df.columns if x.startswith(object_type_prefix)) meaningful_columns = object_type_columns.union( {event_activity, event_id, event_timestamp}) internal_index = exec_utils.get_param_value( Parameters.INTERNAL_INDEX, parameters, constants.DEFAULT_INTERNAL_INDEX) df_red = df[meaningful_columns] stream = df_red.to_dict("records") relations = [] objects = {x: set() for x in object_type_columns} for ev in stream: for ot in object_type_columns: ot_stri = ot.split(object_type_prefix)[1] ev[ot] = parse_list(ev[ot]) oot = objects[ot] for obj in ev[ot]: oot.add(obj) relations.append({ event_id: ev[event_id], event_activity: ev[event_activity], event_timestamp: ev[event_timestamp], object_id_column: obj, object_type_column: ot_stri }) relations = pd.DataFrame(relations) if objects_df is None: objects = [{ object_type_column: x.split(object_type_prefix)[1], object_id_column: y } for x in objects for y in objects[x]] objects_df = pd.DataFrame(objects) del objects df = df[non_object_type_columns] df[event_timestamp] = pd.to_datetime(df[event_timestamp]) df[internal_index] = df.index relations[internal_index] = relations.index df = df.sort_values([event_timestamp, internal_index]) relations = relations.sort_values([event_timestamp, internal_index]) del df[internal_index] del relations[internal_index] return OCEL(df, objects_df, relations)
def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Imports an object-centric event log from a XNK-OCEL file, using LXML Parameters ----------------- file_path Path to the XML-OCEL file parameters Parameters of the algorithm, including: - Parameters.EVENT_ID - Parameters.EVENT_ACTIVITY - Parameters.EVENT_TIMESTAMP - Parameters.OBJECT_ID - Parameters.OBJECT_TYPE - Parameters.INTERNAL_INDEX Returns ------------------ ocel Object-centric event log """ if parameters is None: parameters = {} events = [] relations = [] objects = [] obj_type_dict = {} event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, constants.DEFAULT_EVENT_ID) event_activity = exec_utils.get_param_value( Parameters.EVENT_ACTIVITY, parameters, constants.DEFAULT_EVENT_ACTIVITY) event_timestamp = exec_utils.get_param_value( Parameters.EVENT_TIMESTAMP, parameters, constants.DEFAULT_EVENT_TIMESTAMP) object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, constants.DEFAULT_OBJECT_ID) object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, constants.DEFAULT_OBJECT_TYPE) internal_index = exec_utils.get_param_value( Parameters.INTERNAL_INDEX, parameters, constants.DEFAULT_INTERNAL_INDEX) date_parser = dt_parsing.parser.get() parser = etree.XMLParser(remove_comments=True) tree = objectify.parse(file_path, parser=parser) root = tree.getroot() for child in root: if child.tag.lower().endswith("events"): for event in child: eve_id = None eve_activity = None eve_timestamp = None eve_omap = [] eve_vmap = {} for child2 in event: if child2.get("key") == "id": eve_id = child2.get("value") elif child2.get("key") == "timestamp": eve_timestamp = parse_xml(child2.get("value"), child2.tag.lower(), date_parser) elif child2.get("key") == "activity": eve_activity = child2.get("value") elif child2.get("key") == "omap": for child3 in child2: eve_omap.append(child3.get("value")) elif child2.get("key") == "vmap": for child3 in child2: eve_vmap[child3.get("key")] = parse_xml( child3.get("value"), child3.tag.lower(), date_parser) event_dict = { event_id: eve_id, event_activity: eve_activity, event_timestamp: eve_timestamp } for k, v in eve_vmap.items(): event_dict[k] = v events.append(event_dict) for obj in eve_omap: rel_dict = { event_id: eve_id, event_activity: eve_activity, event_timestamp: eve_timestamp, object_id: obj } relations.append(rel_dict) elif child.tag.lower().endswith("objects"): for object in child: obj_id = None obj_type = None obj_ovmap = {} for child2 in object: if child2.get("key") == "id": obj_id = child2.get("value") elif child2.get("key") == "type": obj_type = child2.get("value") elif child2.get("key") == "ovmap": for child3 in child2: obj_ovmap[child3.get("key")] = parse_xml( child3.get("value"), child3.tag.lower(), date_parser) objects.append({ object_id: obj_id, object_type: obj_type, constants.OCEL_OVMAP_KEY: obj_ovmap }) obj_type_dict[obj_id] = obj_type for rel in relations: rel[object_type] = obj_type_dict[rel[object_id]] events = pd.DataFrame(events) objects = pd.DataFrame(objects) relations = pd.DataFrame(relations) events[internal_index] = events.index relations[internal_index] = relations.index events = events.sort_values([event_timestamp, internal_index]) relations = relations.sort_values([event_timestamp, internal_index]) del events[internal_index] del relations[internal_index] globals = {} return OCEL(events, objects, relations, globals)
def from_traditional_log(log: EventLog, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Transforms an EventLog to an OCEL Parameters ----------------- log Event log parameters Parameters of the algorithm, including: - Parameters.TARGET_OBJECT_TYPE => the name of the object type to which the cases should be mapped - Parameters.ACTIVITY_KEY => the attribute to use as activity - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp - Parameters.CASE_ID_KEY => the attribute to use as case identifier Returns ----------------- ocel OCEL (equivalent to the provided event log) """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) target_object_type = exec_utils.get_param_value( Parameters.TARGET_OBJECT_TYPE, parameters, "OTYPE") activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes_constants.DEFAULT_TRACEID_KEY) events = [] objects = [] relations = [] ev_count = 0 for trace in log: case_id = trace.attributes[case_id_key] obj = { ocel_constants.DEFAULT_OBJECT_ID: case_id, ocel_constants.DEFAULT_OBJECT_TYPE: target_object_type } for attr in trace.attributes: if attr != case_id_key: obj[attr] = trace.attributes[attr] objects.append(obj) for ev in trace: ev_count = ev_count + 1 activity = ev[activity_key] timestamp = ev[timestamp_key] eve = { ocel_constants.DEFAULT_EVENT_ID: str(ev_count), ocel_constants.DEFAULT_EVENT_ACTIVITY: activity, ocel_constants.DEFAULT_EVENT_TIMESTAMP: timestamp } for attr in ev: if attr not in [activity, timestamp]: eve[attr] = ev[attr] events.append(eve) relations.append({ ocel_constants.DEFAULT_EVENT_ID: str(ev_count), ocel_constants.DEFAULT_EVENT_ACTIVITY: activity, ocel_constants.DEFAULT_EVENT_TIMESTAMP: timestamp, ocel_constants.DEFAULT_OBJECT_ID: case_id, ocel_constants.DEFAULT_OBJECT_TYPE: target_object_type }) events = pd.DataFrame(events) objects = pd.DataFrame(objects) relations = pd.DataFrame(relations) return OCEL(events=events, objects=objects, relations=relations)
def from_interleavings(df1: pd.DataFrame, df2: pd.DataFrame, interleavings: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> OCEL: """ Transforms a couple of dataframes, along with the interleavings between them, to an OCEL Parameters ----------------- df1 First of the two dataframes df2 Second of the two dataframes interleavings Interleavings dataframe parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY => the attribute to use as activity - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp - Parameters.CASE_ID_KEY => the attribute to use as case identifier - Parameters.CASE_ATTRIBUTE_PREFIX => the prefix identifying the attributes at the case level - Parameters.TARGET_OBJECT_TYPE => the name of the object type to which the cases of the first log should be mapped - Parameters.TARGET_OBJECT_TYPE_2 => the name of the object type to which the cases of the second log should be mapped - Parameters.LEFT_INDEX => the index column of the events of the first dataframe, in the interleavings dataframe - Parameters.RIGHT_INDEX => the index column of the events of the second dataframe, in the interleavings dataframe. - Parameters.DIRECTION => the direction of the interleavings (LR or RL) Returns ----------------- ocel OCEL (equivalent to the provided event log) """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) case_attribute_prefix = exec_utils.get_param_value( Parameters.CASE_ATTRIBUTE_PREFIX, parameters, constants.CASE_ATTRIBUTE_PREFIX) target_object_type = exec_utils.get_param_value( Parameters.TARGET_OBJECT_TYPE, parameters, "OTYPE") target_object_type_2 = exec_utils.get_param_value( Parameters.TARGET_OBJECT_TYPE_2, parameters, "OTYPE2") left_index = exec_utils.get_param_value(Parameters.LEFT_INDEX, parameters, "@@left_index") right_index = exec_utils.get_param_value(Parameters.RIGHT_INDEX, parameters, "@@right_index") direction = exec_utils.get_param_value(Parameters.DIRECTION, parameters, "@@direction") events1 = __get_events_dataframe(df1, activity_key, timestamp_key, case_id_key, case_attribute_prefix, events_prefix="E1_") objects1 = __get_objects_dataframe(df1, case_id_key, case_attribute_prefix, target_object_type) relations1 = __get_relations_from_events(events1, target_object_type) relations1_minimal = relations1[{ ocel_constants.DEFAULT_EVENT_ID, ocel_constants.DEFAULT_OBJECT_ID, ocel_constants.DEFAULT_OBJECT_TYPE }] events2 = __get_events_dataframe(df2, activity_key, timestamp_key, case_id_key, case_attribute_prefix, events_prefix="E2_") objects2 = __get_objects_dataframe(df2, case_id_key, case_attribute_prefix, target_object_type_2) relations2 = __get_relations_from_events(events2, target_object_type_2) relations2_minimal = relations2[{ ocel_constants.DEFAULT_EVENT_ID, ocel_constants.DEFAULT_OBJECT_ID, ocel_constants.DEFAULT_OBJECT_TYPE }] interleavings[left_index] = "E1_" + interleavings[left_index].astype( int).astype(str) interleavings[right_index] = "E2_" + interleavings[right_index].astype( int).astype(str) interleavings_lr = interleavings[interleavings[direction] == "LR"][[ left_index, right_index ]] interleavings_rl = interleavings[interleavings[direction] == "RL"][[ left_index, right_index ]] relations3 = events1.merge(interleavings_lr, left_on=ocel_constants.DEFAULT_EVENT_ID, right_on=left_index) relations3 = relations3.merge(relations2_minimal, left_on=right_index, right_on=ocel_constants.DEFAULT_EVENT_ID, suffixes=('', '_@#@#RIGHT')) relations3[ocel_constants.DEFAULT_OBJECT_ID] = relations3[ ocel_constants.DEFAULT_OBJECT_ID + '_@#@#RIGHT'] relations3[ocel_constants.DEFAULT_OBJECT_TYPE] = target_object_type_2 relations4 = events2.merge(interleavings_rl, left_on=ocel_constants.DEFAULT_EVENT_ID, right_on=right_index) relations4 = relations4.merge(relations1_minimal, left_on=left_index, right_on=ocel_constants.DEFAULT_EVENT_ID, suffixes=('', '_@#@#LEFT')) relations4[ocel_constants.DEFAULT_OBJECT_ID] = relations4[ ocel_constants.DEFAULT_OBJECT_ID + '_@#@#LEFT'] relations4[ocel_constants.DEFAULT_OBJECT_TYPE] = target_object_type del events1[ocel_constants.DEFAULT_OBJECT_ID] del events2[ocel_constants.DEFAULT_OBJECT_ID] events = pd.concat([events1, events2]) objects = pd.concat([objects1, objects2]) relations = pd.concat([relations1, relations2, relations3, relations4]) events = events.sort_values([ ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID ]) relations = relations.sort_values([ ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID ]) return OCEL(events=events, objects=objects, relations=relations)