Exemple #1
0
def apply(ocel: OCEL,
          correspondence_dict: Dict[str, Collection[str]],
          parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Filters an object-centric event log keeping only the specified object types
    with the specified activity set (filters out the rest).

    Parameters
    ----------------
    ocel
        Object-centric event log
    correspondence_dict
        Dictionary containing, for every object type of interest, a
        collection of allowed activities.  Example:

        {"order": ["Create Order"], "element": ["Create Order", "Create Delivery"]}

        Keeps only the object types "order" and "element".
        For the "order" object type, only the activity "Create Order" is kept.
        For the "element" object type, only the activities "Create Order" and "Create Delivery" are kept.
    parameters
        Parameters of the algorithm, including:
            - Parameters.ACTIVITY_KEY => the activity key
            - Parameters.OBJECT_TYPE => the object type column

    Returns
    -----------------
    filtered_ocel
        Filtered object-centric event log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, ocel.event_activity)
    object_type_column = exec_utils.get_param_value(Parameters.OBJECT_TYPE,
                                                    parameters,
                                                    ocel.object_type_column)
    temp_column = exec_utils.get_param_value(Parameters.TEMP_COLUMN,
                                             parameters, "@@temp_column")
    temp_separator = exec_utils.get_param_value(Parameters.TEMP_SEPARATOR,
                                                parameters, "@#@#")

    ocel = copy(ocel)

    inv_dict = set()
    for ot in correspondence_dict:
        for act in correspondence_dict[ot]:
            inv_dict.add(act + temp_separator + ot)

    ocel.relations[temp_column] = ocel.relations[
        activity_key] + temp_separator + ocel.relations[object_type_column]
    ocel.relations = ocel.relations[ocel.relations[temp_column].isin(inv_dict)]

    del ocel.relations[temp_column]

    return filtering_utils.propagate_relations_filtering(ocel,
                                                         parameters=parameters)
Exemple #2
0
def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None):
    """
    Creates an iterator over the events of an object-centric event log

    Parameters
    ----------------
    ocel
        OCEL
    parameters
        Parameters of the method, including:
        - Parameters.OCEL_TYPE_PREFIX => the prefix of the object types in the OCEL (default: ocel:type)

    Returns
    ----------------
    yielded event
        The events of the OCEL, one by one.
    """
    if parameters is None:
        parameters = {}

    ot_prefix = exec_utils.get_param_value(
        Parameters.OCEL_TYPE_PREFIX, parameters,
        ocel_constants.DEFAULT_OBJECT_TYPE_PREFIX_EXTENDED)

    ext_table = ocel.get_extended_table(ot_prefix)

    for k, ev in ext_table.iterrows():
        yield {
            x: y
            for x, y in dict(ev).items()
            if isinstance(y, list) or not pd.isna(y)
        }
Exemple #3
0
def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Explode an OCEL: an event associated to N objects is "split" to N events, each one associated to one object.

    Parameters
    -----------------
    ocel
        Object-centric event log
    parameters
        Possible parameters of the algorithm

    Returns
    -----------------
    ocel
        Exploded object-centric event log
    """
    if parameters is None:
        parameters = {}

    ocel = deepcopy(ocel)
    ocel.relations[ocel.event_id_column] = ocel.relations[ocel.event_id_column] + "_" + ocel.relations[ocel.object_id_column]
    ocel.events = ocel.relations.copy()
    del ocel.events[ocel.object_id_column]
    del ocel.events[ocel.object_type_column]

    return ocel
Exemple #4
0
def from_traditional_pandas(df: pd.DataFrame,
                            parameters: Optional[Dict[Any,
                                                      Any]] = None) -> OCEL:
    """
    Transforms a dataframe to an OCEL

    Parameters
    -----------------
    df
        Pandas dataframe
    parameters
        Parameters of the algorithm, including:
        - Parameters.TARGET_OBJECT_TYPE => the name of the object type to which the cases should be mapped
        - Parameters.ACTIVITY_KEY => the attribute to use as activity
        - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp
        - Parameters.CASE_ID_KEY => the attribute to use as case identifier
        - Parameters.CASE_ATTRIBUTE_PREFIX => the prefix identifying the attributes at the case level

    Returns
    -----------------
    ocel
        OCEL (equivalent to the provided event log)
    """
    if parameters is None:
        parameters = {}

    target_object_type = exec_utils.get_param_value(
        Parameters.TARGET_OBJECT_TYPE, parameters, "OTYPE")
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             constants.CASE_CONCEPT_NAME)
    case_attribute_prefix = exec_utils.get_param_value(
        Parameters.CASE_ATTRIBUTE_PREFIX, parameters,
        constants.CASE_ATTRIBUTE_PREFIX)

    events = __get_events_dataframe(df, activity_key, timestamp_key,
                                    case_id_key, case_attribute_prefix)
    objects = __get_objects_dataframe(df, case_id_key, case_attribute_prefix,
                                      target_object_type)
    relations = __get_relations_from_events(events, target_object_type)
    del events[ocel_constants.DEFAULT_OBJECT_ID]

    events = events.sort_values([
        ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID
    ])
    relations = relations.sort_values([
        ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID
    ])

    return OCEL(events=events, objects=objects, relations=relations)
Exemple #5
0
def apply(ocel: OCEL,
          values: Collection[Any],
          parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Filters the object-centric event log on the provided object attributes values

    Parameters
    ----------------
    ocel
        Object-centric event log
    values
        Collection of values
    parameters
        Parameters of the algorithm, including:
        - Parameters.ATTRIBUTE_KEY => the attribute that should be filtered
        - Parameters.POSITIVE => decides if the values should be kept (positive=True) or removed (positive=False)

    Returns
    ----------------
    ocel
        Filtered object-centric event log
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY,
                                               parameters,
                                               ocel.object_type_column)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters,
                                          True)

    ocel = copy(ocel)
    if positive:
        ocel.objects = ocel.objects[ocel.objects[attribute_key].isin(values)]
    else:
        ocel.objects = ocel.objects[~ocel.objects[attribute_key].isin(values)]

    return filtering_utils.propagate_object_filtering(ocel,
                                                      parameters=parameters)
Exemple #6
0
def propagate_event_filtering(ocel: OCEL,
                              parameters: Optional[Dict[Any,
                                                        Any]] = None) -> OCEL:
    """
    Propagates the filtering at the event level to the remaining parts of the OCEL structure
    (objects, relations)

    Parameters
    ----------------
    ocel
        Object-centric event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.EVENT_ID => the column to be used as case identifier
        - Parameters.OBJECT_ID => the column to be used as object identifier
        - Parameters.OBJECT_TYPE => the column to be used as object type

    Returns
    ----------------
    ocel
        Object-centric event log with propagated filter
    """
    if parameters is None:
        parameters = {}

    event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters,
                                          ocel.event_id_column)
    object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters,
                                           ocel.object_id_column)

    selected_event_ids = set(ocel.events[event_id].unique())
    ocel.relations = ocel.relations[ocel.relations[event_id].isin(
        selected_event_ids)]
    selected_object_ids = set(ocel.relations[object_id].unique())
    ocel.objects = ocel.objects[ocel.objects[object_id].isin(
        selected_object_ids)]

    return ocel
Exemple #7
0
def filter_end_events_per_object_type(
        ocel: OCEL,
        object_type: str,
        parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Filters the events in which an object for the given object type terminates its lifecycle.
    (E.g. an event with activity "Pay Order" might terminate an order).

    Parameters
    ------------------
    ocel
        Object-centric event log
    object_type
        Object type to consider
    parameters
        Parameters of the algorithm, including:
        - Parameters.EVENT_ID => the attribute working as event identifier
        - Parameters.OBJECT_ID => the attribute working as object identifier
        - Parameters.OBJECT_TYPE => the attribute working as object type

    Returns
    ------------------
    filtered_ocel
        Filtered object-centric event log
    """
    if parameters is None:
        parameters = {}

    event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters,
                                          ocel.event_id_column)
    object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters,
                                           ocel.object_id_column)
    object_type_column = exec_utils.get_param_value(Parameters.OBJECT_TYPE,
                                                    parameters,
                                                    ocel.object_type_column)

    evs = set(
        ocel.relations[ocel.relations[object_type_column] == object_type].
        groupby(object_id).last()[event_id].unique())

    ocel = copy(ocel)
    ocel.events = ocel.events[ocel.events[event_id].isin(evs)]

    return filtering_utils.propagate_event_filtering(ocel,
                                                     parameters=parameters)
Exemple #8
0
def sample_ocel_events(ocel: OCEL,
                       parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Keeps a sample of the events of an object-centric event log

    Parameters
    ------------------
    ocel
        Object-centric event log
    parameters
        Parameters of the algorithm, including:
            - Parameters.EVENT_ID => event identifier
            - Parameters.NUM_EVENTS => number of events

    Returns
    ------------------
    sampled_ocel
        Sampled object-centric event log
    """
    if parameters is None:
        parameters = {}

    event_id_column = exec_utils.get_param_value(Parameters.EVENT_ID,
                                                 parameters,
                                                 ocel.event_id_column)
    num_entities = exec_utils.get_param_value(Parameters.NUM_ENTITIES,
                                              parameters, 100)

    events = list(ocel.events[event_id_column].unique())
    num_events = min(len(events), num_entities)

    random.shuffle(events)
    picked_events = events[:num_events]

    ocel = copy(ocel)
    ocel.events = ocel.events[ocel.events[event_id_column].isin(picked_events)]

    return filtering_utils.propagate_event_filtering(ocel,
                                                     parameters=parameters)
Exemple #9
0
def apply_timestamp(ocel: OCEL,
                    min_timest: Union[datetime.datetime, str],
                    max_timest: Union[datetime.datetime, str],
                    parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Filters the object-centric event log keeping events in the provided timestamp range

    Parameters
    -----------------
    ocel
        Object-centric event log
    min_timest
        Left extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS)
    max_timest
        Right extreme of the allowed timestamp interval (provided in the format: YYYY-mm-dd HH:MM:SS)
    parameters
        Parameters of the algorithm, including:
        - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp

    Returns
    -----------------
    filtered_ocel
        Filtered object-centric event log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               ocel.event_timestamp)
    min_timest = get_dt_from_string(min_timest)
    max_timest = get_dt_from_string(max_timest)

    ocel = copy(ocel)
    ocel.events = ocel.events[ocel.events[timestamp_key] >= min_timest]
    ocel.events = ocel.events[ocel.events[timestamp_key] <= max_timest]

    return filtering_utils.propagate_event_filtering(ocel,
                                                     parameters=parameters)
Exemple #10
0
def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Imports an object-centric event log from a JSON-OCEL file, using the default JSON backend of Python

    Parameters
    -----------------
    file_path
        Path to the JSON-OCEL file
    parameters
        Parameters of the algorithm, including:
        - Parameters.EVENT_ID
        - Parameters.EVENT_ACTIVITY
        - Parameters.EVENT_TIMESTAMP
        - Parameters.OBJECT_ID
        - Parameters.OBJECT_TYPE
        - Parameters.INTERNAL_INDEX

    Returns
    ------------------
    ocel
        Object-centric event log
    """
    if parameters is None:
        parameters = {}

    ocel = json.load(open(file_path, "r"))

    events = []
    relations = []
    objects = []

    event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters,
                                          constants.DEFAULT_EVENT_ID)
    event_activity = exec_utils.get_param_value(
        Parameters.EVENT_ACTIVITY, parameters,
        constants.DEFAULT_EVENT_ACTIVITY)
    event_timestamp = exec_utils.get_param_value(
        Parameters.EVENT_TIMESTAMP, parameters,
        constants.DEFAULT_EVENT_TIMESTAMP)
    object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters,
                                           constants.DEFAULT_OBJECT_ID)
    object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE,
                                             parameters,
                                             constants.DEFAULT_OBJECT_TYPE)
    internal_index = exec_utils.get_param_value(
        Parameters.INTERNAL_INDEX, parameters,
        constants.DEFAULT_INTERNAL_INDEX)

    parser = dt_parsing.parser.get()

    types_dict = {}
    for obj_id in ocel[constants.OCEL_OBJECTS_KEY]:
        obj = ocel[constants.OCEL_OBJECTS_KEY][obj_id]
        obj_type = obj[constants.DEFAULT_OBJECT_TYPE]
        types_dict[obj_id] = obj_type
        dct = {object_id: obj_id, object_type: obj_type}
        for k, v in obj[constants.OCEL_OVMAP_KEY].items():
            dct[k] = v
        objects.append(dct)

    for ev_id in ocel[constants.OCEL_EVENTS_KEY]:
        ev = ocel[constants.OCEL_EVENTS_KEY][ev_id]
        dct = {
            event_id: ev_id,
            event_timestamp:
            parser.apply(ev[constants.DEFAULT_EVENT_TIMESTAMP]),
            event_activity: ev[constants.DEFAULT_EVENT_ACTIVITY]
        }
        for k, v in ev[constants.OCEL_VMAP_KEY].items():
            dct[k] = v
        for obj in ev[constants.OCEL_OMAP_KEY]:
            relations.append({
                event_id:
                ev_id,
                event_activity:
                ev[constants.DEFAULT_EVENT_ACTIVITY],
                event_timestamp:
                parser.apply(ev[constants.DEFAULT_EVENT_TIMESTAMP]),
                object_id:
                obj,
                object_type:
                types_dict[obj]
            })
        events.append(dct)

    events = pd.DataFrame(events)
    objects = pd.DataFrame(objects)
    relations = pd.DataFrame(relations)

    events[internal_index] = events.index
    relations[internal_index] = relations.index

    events = events.sort_values([event_timestamp, internal_index])
    relations = relations.sort_values([event_timestamp, internal_index])

    del events[internal_index]
    del relations[internal_index]

    globals = {}
    globals[constants.OCEL_GLOBAL_LOG] = ocel[constants.OCEL_GLOBAL_LOG]
    globals[constants.OCEL_GLOBAL_EVENT] = ocel[constants.OCEL_GLOBAL_EVENT]
    globals[constants.OCEL_GLOBAL_OBJECT] = ocel[constants.OCEL_GLOBAL_OBJECT]

    return OCEL(events, objects, relations, globals)
Exemple #11
0
def apply(ocel: OCEL,
          min_num_obj_type: Dict[str, int],
          parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Filters the events of the object-centric logs which are related to at least
    the specified amount of objects per type.

    E.g. apply(ocel, {"order": 1, "element": 2})

    Would keep the following events:

      ocel:eid ocel:timestamp ocel:activity ocel:type:element ocel:type:order
    0       e1     1980-01-01  Create Order  [i4, i1, i3, i2]            [o1]
    1      e11     1981-01-01  Create Order          [i6, i5]            [o2]
    2      e14     1981-01-04  Create Order          [i8, i7]            [o3]

    Parameters
    ------------------
    ocel
        Object-centric event log
    min_num_obj_type
        Minimum number of objects per type
    parameters
        Parameters of the filter, including:
        - Parameters.EVENT_ID => the event identifier
        - Parameters.OBJECT_ID => the object identifier
        - Parameters.OBJECT_TYPE => the object type

    Returns
    -----------------
    filtered_event_log
        Filtered object-centric event log
    """
    if parameters is None:
        parameters = {}

    event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters,
                                          ocel.event_id_column)
    object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters,
                                           ocel.object_id_column)
    object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE,
                                             parameters,
                                             ocel.object_type_column)

    num_obj = objects_ot_count.get_objects_ot_count(ocel,
                                                    parameters=parameters)

    filt_evs = set()

    for evid, evobjs in num_obj.items():
        is_ok = True
        for k, v in min_num_obj_type.items():
            if not k in evobjs:
                is_ok = False
                break
            elif evobjs[k] < v:
                is_ok = False
                break
        if is_ok:
            filt_evs.add(evid)

    ocel = copy(ocel)
    ocel.events = ocel.events[ocel.events[event_id].isin(filt_evs)]

    return filtering_utils.propagate_event_filtering(ocel,
                                                     parameters=parameters)
Exemple #12
0
def get_ocel_from_extended_table(
        df: pd.DataFrame,
        objects_df: Optional[Dict[Any, Any]] = None,
        parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    if parameters is None:
        parameters = {}

    object_type_prefix = exec_utils.get_param_value(
        Parameters.OBJECT_TYPE_PREFIX, parameters,
        constants.DEFAULT_OBJECT_TYPE_PREFIX_EXTENDED)
    event_activity = exec_utils.get_param_value(
        Parameters.EVENT_ACTIVITY, parameters,
        constants.DEFAULT_EVENT_ACTIVITY)
    event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters,
                                          constants.DEFAULT_EVENT_ID)
    event_timestamp = exec_utils.get_param_value(
        Parameters.EVENT_TIMESTAMP, parameters,
        constants.DEFAULT_EVENT_TIMESTAMP)
    object_id_column = exec_utils.get_param_value(Parameters.OBJECT_ID,
                                                  parameters,
                                                  constants.DEFAULT_OBJECT_ID)
    object_type_column = exec_utils.get_param_value(
        Parameters.OBJECT_TYPE, parameters, constants.DEFAULT_OBJECT_TYPE)

    non_object_type_columns = set(x for x in df.columns
                                  if not x.startswith(object_type_prefix))
    object_type_columns = set(x for x in df.columns
                              if x.startswith(object_type_prefix))
    meaningful_columns = object_type_columns.union(
        {event_activity, event_id, event_timestamp})
    internal_index = exec_utils.get_param_value(
        Parameters.INTERNAL_INDEX, parameters,
        constants.DEFAULT_INTERNAL_INDEX)

    df_red = df[meaningful_columns]

    stream = df_red.to_dict("records")
    relations = []
    objects = {x: set() for x in object_type_columns}

    for ev in stream:
        for ot in object_type_columns:
            ot_stri = ot.split(object_type_prefix)[1]
            ev[ot] = parse_list(ev[ot])
            oot = objects[ot]
            for obj in ev[ot]:
                oot.add(obj)
                relations.append({
                    event_id: ev[event_id],
                    event_activity: ev[event_activity],
                    event_timestamp: ev[event_timestamp],
                    object_id_column: obj,
                    object_type_column: ot_stri
                })

    relations = pd.DataFrame(relations)

    if objects_df is None:
        objects = [{
            object_type_column: x.split(object_type_prefix)[1],
            object_id_column: y
        } for x in objects for y in objects[x]]
        objects_df = pd.DataFrame(objects)

    del objects

    df = df[non_object_type_columns]
    df[event_timestamp] = pd.to_datetime(df[event_timestamp])

    df[internal_index] = df.index
    relations[internal_index] = relations.index

    df = df.sort_values([event_timestamp, internal_index])
    relations = relations.sort_values([event_timestamp, internal_index])

    del df[internal_index]
    del relations[internal_index]

    return OCEL(df, objects_df, relations)
Exemple #13
0
def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Imports an object-centric event log from a XNK-OCEL file, using LXML

    Parameters
    -----------------
    file_path
        Path to the XML-OCEL file
    parameters
        Parameters of the algorithm, including:
        - Parameters.EVENT_ID
        - Parameters.EVENT_ACTIVITY
        - Parameters.EVENT_TIMESTAMP
        - Parameters.OBJECT_ID
        - Parameters.OBJECT_TYPE
        - Parameters.INTERNAL_INDEX

    Returns
    ------------------
    ocel
        Object-centric event log
    """
    if parameters is None:
        parameters = {}

    events = []
    relations = []
    objects = []
    obj_type_dict = {}

    event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters,
                                          constants.DEFAULT_EVENT_ID)
    event_activity = exec_utils.get_param_value(
        Parameters.EVENT_ACTIVITY, parameters,
        constants.DEFAULT_EVENT_ACTIVITY)
    event_timestamp = exec_utils.get_param_value(
        Parameters.EVENT_TIMESTAMP, parameters,
        constants.DEFAULT_EVENT_TIMESTAMP)
    object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters,
                                           constants.DEFAULT_OBJECT_ID)
    object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE,
                                             parameters,
                                             constants.DEFAULT_OBJECT_TYPE)
    internal_index = exec_utils.get_param_value(
        Parameters.INTERNAL_INDEX, parameters,
        constants.DEFAULT_INTERNAL_INDEX)

    date_parser = dt_parsing.parser.get()

    parser = etree.XMLParser(remove_comments=True)
    tree = objectify.parse(file_path, parser=parser)
    root = tree.getroot()

    for child in root:
        if child.tag.lower().endswith("events"):
            for event in child:
                eve_id = None
                eve_activity = None
                eve_timestamp = None
                eve_omap = []
                eve_vmap = {}
                for child2 in event:
                    if child2.get("key") == "id":
                        eve_id = child2.get("value")
                    elif child2.get("key") == "timestamp":
                        eve_timestamp = parse_xml(child2.get("value"),
                                                  child2.tag.lower(),
                                                  date_parser)
                    elif child2.get("key") == "activity":
                        eve_activity = child2.get("value")
                    elif child2.get("key") == "omap":
                        for child3 in child2:
                            eve_omap.append(child3.get("value"))
                    elif child2.get("key") == "vmap":
                        for child3 in child2:
                            eve_vmap[child3.get("key")] = parse_xml(
                                child3.get("value"), child3.tag.lower(),
                                date_parser)

                event_dict = {
                    event_id: eve_id,
                    event_activity: eve_activity,
                    event_timestamp: eve_timestamp
                }
                for k, v in eve_vmap.items():
                    event_dict[k] = v
                events.append(event_dict)

                for obj in eve_omap:
                    rel_dict = {
                        event_id: eve_id,
                        event_activity: eve_activity,
                        event_timestamp: eve_timestamp,
                        object_id: obj
                    }
                    relations.append(rel_dict)
        elif child.tag.lower().endswith("objects"):
            for object in child:
                obj_id = None
                obj_type = None
                obj_ovmap = {}
                for child2 in object:
                    if child2.get("key") == "id":
                        obj_id = child2.get("value")
                    elif child2.get("key") == "type":
                        obj_type = child2.get("value")
                    elif child2.get("key") == "ovmap":
                        for child3 in child2:
                            obj_ovmap[child3.get("key")] = parse_xml(
                                child3.get("value"), child3.tag.lower(),
                                date_parser)
                objects.append({
                    object_id: obj_id,
                    object_type: obj_type,
                    constants.OCEL_OVMAP_KEY: obj_ovmap
                })
                obj_type_dict[obj_id] = obj_type

    for rel in relations:
        rel[object_type] = obj_type_dict[rel[object_id]]

    events = pd.DataFrame(events)
    objects = pd.DataFrame(objects)
    relations = pd.DataFrame(relations)

    events[internal_index] = events.index
    relations[internal_index] = relations.index

    events = events.sort_values([event_timestamp, internal_index])
    relations = relations.sort_values([event_timestamp, internal_index])

    del events[internal_index]
    del relations[internal_index]

    globals = {}

    return OCEL(events, objects, relations, globals)
Exemple #14
0
def from_traditional_log(log: EventLog,
                         parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Transforms an EventLog to an OCEL

    Parameters
    -----------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.TARGET_OBJECT_TYPE => the name of the object type to which the cases should be mapped
        - Parameters.ACTIVITY_KEY => the attribute to use as activity
        - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp
        - Parameters.CASE_ID_KEY => the attribute to use as case identifier

    Returns
    -----------------
    ocel
        OCEL (equivalent to the provided event log)
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    target_object_type = exec_utils.get_param_value(
        Parameters.TARGET_OBJECT_TYPE, parameters, "OTYPE")
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             xes_constants.DEFAULT_TRACEID_KEY)

    events = []
    objects = []
    relations = []

    ev_count = 0
    for trace in log:
        case_id = trace.attributes[case_id_key]
        obj = {
            ocel_constants.DEFAULT_OBJECT_ID: case_id,
            ocel_constants.DEFAULT_OBJECT_TYPE: target_object_type
        }
        for attr in trace.attributes:
            if attr != case_id_key:
                obj[attr] = trace.attributes[attr]
        objects.append(obj)
        for ev in trace:
            ev_count = ev_count + 1
            activity = ev[activity_key]
            timestamp = ev[timestamp_key]
            eve = {
                ocel_constants.DEFAULT_EVENT_ID: str(ev_count),
                ocel_constants.DEFAULT_EVENT_ACTIVITY: activity,
                ocel_constants.DEFAULT_EVENT_TIMESTAMP: timestamp
            }
            for attr in ev:
                if attr not in [activity, timestamp]:
                    eve[attr] = ev[attr]
            events.append(eve)
            relations.append({
                ocel_constants.DEFAULT_EVENT_ID:
                str(ev_count),
                ocel_constants.DEFAULT_EVENT_ACTIVITY:
                activity,
                ocel_constants.DEFAULT_EVENT_TIMESTAMP:
                timestamp,
                ocel_constants.DEFAULT_OBJECT_ID:
                case_id,
                ocel_constants.DEFAULT_OBJECT_TYPE:
                target_object_type
            })

    events = pd.DataFrame(events)
    objects = pd.DataFrame(objects)
    relations = pd.DataFrame(relations)

    return OCEL(events=events, objects=objects, relations=relations)
Exemple #15
0
def from_interleavings(df1: pd.DataFrame,
                       df2: pd.DataFrame,
                       interleavings: pd.DataFrame,
                       parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
    """
    Transforms a couple of dataframes, along with the interleavings between them, to an OCEL

    Parameters
    -----------------
    df1
        First of the two dataframes
    df2
        Second of the two dataframes
    interleavings
        Interleavings dataframe
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY => the attribute to use as activity
        - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp
        - Parameters.CASE_ID_KEY => the attribute to use as case identifier
        - Parameters.CASE_ATTRIBUTE_PREFIX => the prefix identifying the attributes at the case level
        - Parameters.TARGET_OBJECT_TYPE => the name of the object type to which the cases of the first log should be mapped
        - Parameters.TARGET_OBJECT_TYPE_2 => the name of the object type to which the cases of the second log should be mapped
        - Parameters.LEFT_INDEX => the index column of the events of the first dataframe, in the interleavings dataframe
        - Parameters.RIGHT_INDEX => the index column of the events of the second dataframe, in the interleavings
                                    dataframe.
        - Parameters.DIRECTION => the direction of the interleavings (LR or RL)

    Returns
    -----------------
    ocel
        OCEL (equivalent to the provided event log)
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             constants.CASE_CONCEPT_NAME)
    case_attribute_prefix = exec_utils.get_param_value(
        Parameters.CASE_ATTRIBUTE_PREFIX, parameters,
        constants.CASE_ATTRIBUTE_PREFIX)
    target_object_type = exec_utils.get_param_value(
        Parameters.TARGET_OBJECT_TYPE, parameters, "OTYPE")
    target_object_type_2 = exec_utils.get_param_value(
        Parameters.TARGET_OBJECT_TYPE_2, parameters, "OTYPE2")
    left_index = exec_utils.get_param_value(Parameters.LEFT_INDEX, parameters,
                                            "@@left_index")
    right_index = exec_utils.get_param_value(Parameters.RIGHT_INDEX,
                                             parameters, "@@right_index")
    direction = exec_utils.get_param_value(Parameters.DIRECTION, parameters,
                                           "@@direction")

    events1 = __get_events_dataframe(df1,
                                     activity_key,
                                     timestamp_key,
                                     case_id_key,
                                     case_attribute_prefix,
                                     events_prefix="E1_")
    objects1 = __get_objects_dataframe(df1, case_id_key, case_attribute_prefix,
                                       target_object_type)
    relations1 = __get_relations_from_events(events1, target_object_type)

    relations1_minimal = relations1[{
        ocel_constants.DEFAULT_EVENT_ID, ocel_constants.DEFAULT_OBJECT_ID,
        ocel_constants.DEFAULT_OBJECT_TYPE
    }]

    events2 = __get_events_dataframe(df2,
                                     activity_key,
                                     timestamp_key,
                                     case_id_key,
                                     case_attribute_prefix,
                                     events_prefix="E2_")
    objects2 = __get_objects_dataframe(df2, case_id_key, case_attribute_prefix,
                                       target_object_type_2)
    relations2 = __get_relations_from_events(events2, target_object_type_2)
    relations2_minimal = relations2[{
        ocel_constants.DEFAULT_EVENT_ID, ocel_constants.DEFAULT_OBJECT_ID,
        ocel_constants.DEFAULT_OBJECT_TYPE
    }]

    interleavings[left_index] = "E1_" + interleavings[left_index].astype(
        int).astype(str)
    interleavings[right_index] = "E2_" + interleavings[right_index].astype(
        int).astype(str)
    interleavings_lr = interleavings[interleavings[direction] == "LR"][[
        left_index, right_index
    ]]
    interleavings_rl = interleavings[interleavings[direction] == "RL"][[
        left_index, right_index
    ]]

    relations3 = events1.merge(interleavings_lr,
                               left_on=ocel_constants.DEFAULT_EVENT_ID,
                               right_on=left_index)
    relations3 = relations3.merge(relations2_minimal,
                                  left_on=right_index,
                                  right_on=ocel_constants.DEFAULT_EVENT_ID,
                                  suffixes=('', '_@#@#RIGHT'))
    relations3[ocel_constants.DEFAULT_OBJECT_ID] = relations3[
        ocel_constants.DEFAULT_OBJECT_ID + '_@#@#RIGHT']
    relations3[ocel_constants.DEFAULT_OBJECT_TYPE] = target_object_type_2

    relations4 = events2.merge(interleavings_rl,
                               left_on=ocel_constants.DEFAULT_EVENT_ID,
                               right_on=right_index)
    relations4 = relations4.merge(relations1_minimal,
                                  left_on=left_index,
                                  right_on=ocel_constants.DEFAULT_EVENT_ID,
                                  suffixes=('', '_@#@#LEFT'))
    relations4[ocel_constants.DEFAULT_OBJECT_ID] = relations4[
        ocel_constants.DEFAULT_OBJECT_ID + '_@#@#LEFT']
    relations4[ocel_constants.DEFAULT_OBJECT_TYPE] = target_object_type

    del events1[ocel_constants.DEFAULT_OBJECT_ID]
    del events2[ocel_constants.DEFAULT_OBJECT_ID]

    events = pd.concat([events1, events2])
    objects = pd.concat([objects1, objects2])
    relations = pd.concat([relations1, relations2, relations3, relations4])

    events = events.sort_values([
        ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID
    ])
    relations = relations.sort_values([
        ocel_constants.DEFAULT_EVENT_TIMESTAMP, ocel_constants.DEFAULT_EVENT_ID
    ])

    return OCEL(events=events, objects=objects, relations=relations)