Example #1
0
def project(log: EventLog, cut: Cut, activity_key: str) -> List[EventLog]:
    do = cut[0]
    redo = cut[1:]
    do_log = EventLog()
    redo_logs = []
    for i in range(len(redo)):
        redo_logs.append(EventLog())
    for t in log:
        do_trace = Trace()
        redo_trace = Trace()
        for e in t:
            if e[activity_key] in do:
                do_trace.append(e)
                if len(redo_trace) > 0:
                    redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
                    redo_trace = Trace()
            else:
                redo_trace.append(e)
                if len(do_trace) > 0:
                    do_log.append(do_trace)
                    do_trace = Trace()
        if len(redo_trace) > 0:
            redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
        do_log.append(do_trace)
    logs = [do_log]
    logs.extend(redo_logs)
    return logs
Example #2
0
def detect(log: EventLog, start_activities, act_key: str) -> Optional[EventLog]:
    proj = EventLog()
    for t in log:
        x = 0
        for i in range(1, len(t)):
            if t[i][act_key] in start_activities:
                proj.append(Trace(t[x:i]))
                x = i
        proj.append(Trace(t[x:len(t)]))
    return proj if len(proj) > len(log) else None
Example #3
0
def form_log_from_dictio_couple(first_cases_repr,
                                second_cases_repr,
                                enable_multiplier=False):
    """
    Form a log from a couple of dictionary, to use for
    root cause analysis

    Parameters
    -------------
    first_cases_repr
        First cases representation
    second_cases_repr
        Second cases representation
    enable_multiplier
        Enable balancing of classes

    Returns
    ------------
    log
        Trace log object
    """
    log = EventLog()

    if enable_multiplier:
        multiplier_first = int(
            max(
                float(len(second_cases_repr)) / float(len(first_cases_repr)),
                1))
        multiplier_second = int(
            max(
                float(len(first_cases_repr)) / float(len(second_cases_repr)),
                1))
    else:
        multiplier_first = 1
        multiplier_second = 1

    for j in range(multiplier_first):
        for i in range(len(first_cases_repr)):
            trace = Trace()
            event = Event(first_cases_repr[i])
            trace.append(event)
            log.append(trace)

    for j in range(multiplier_second):
        for i in range(len(second_cases_repr)):
            trace = Trace()
            event = Event(second_cases_repr[i])
            trace.append(event)
            log.append(trace)

    return log
Example #4
0
def get_log_with_log_prefixes(log, parameters=None):
    """
    Gets an extended log that contains, in order, all the prefixes for a case of the original log

    Parameters
    --------------
    log
        Original log
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    all_prefixes_log
        Log with all the prefixes
    change_indexes
        Indexes of the extended log where there was a change between cases
    """
    all_prefixes_log = EventLog()
    change_indexes = []

    for trace in log:
        cumulative_trace = Trace()
        for event in trace:
            all_prefixes_log.append(deepcopy(cumulative_trace))
            cumulative_trace.append(event)
            all_prefixes_log.append(deepcopy(cumulative_trace))
        change_indexes.append([len(all_prefixes_log) - 1] * len(trace))

    return all_prefixes_log, change_indexes
Example #5
0
def sort_timestamp_trace(trace,
                         timestamp_key=xes.DEFAULT_TIMESTAMP_KEY,
                         reverse_sort=False):
    """
    Sort a trace based on timestamp key

    Parameters
    -----------
    trace
        Trace
    timestamp_key
        Timestamp key
    reverse_sort
        If true, reverses the direction in which the sort is done (ascending)

    Returns
    -----------
    trace
        Sorted trace
    """
    events = sorted(trace._list,
                    key=lambda x: x[timestamp_key],
                    reverse=reverse_sort)
    new_trace = Trace(events, attributes=trace.attributes)
    return new_trace
Example #6
0
def apply_tree_variants(variants, parameters=None):
    """
    Apply the IM algorithm to a dictionary of variants obtaining a process tree

    Parameters
    ----------
    variants
        Variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    log = EventLog()
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    var_keys = list(variants.keys())
    for var in var_keys:
        trace = Trace()
        activities = variants_util.get_activities_from_variant(var)
        for act in activities:
            trace.append(Event({activity_key: act}))
        log.append(trace)

    return apply_tree(log, parameters=parameters)
Example #7
0
def get_prefixes_from_log(log: EventLog, length: int) -> EventLog:
    """
    Gets the prefixes of a log of a given length

    Parameters
    ----------------
    log
        Event log
    length
        Length

    Returns
    ----------------
    prefix_log
        Log contain the prefixes:
        - if a trace has lower or identical length, it is included as-is
        - if a trace has greater length, it is cut
    """
    prefix_log = EventLog(list(),
                          attributes=log.attributes,
                          extensions=log.extensions,
                          classifiers=log.classifiers,
                          omni_present=log.omni_present,
                          properties=log.properties)
    for trace in log:
        if len(trace) <= length:
            prefix_log.append(trace)
        else:
            new_trace = Trace(attributes=trace.attributes)
            for i in range(length):
                new_trace.append(trace[i])
            prefix_log.append(new_trace)
    return prefix_log
Example #8
0
def list_of_str_to_trace(activities: List[str]) -> Trace:
    t = Trace()
    for a in activities:
        e = Event()
        e["concept:name"] = a
        t.append(e)
    return t
Example #9
0
def apply(df, parameters=None):
    """
    Convert a dataframe into a log containing 1 case per variant (only control-flow
    perspective is considered)

    Parameters
    -------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        Event log
    """
    from pm4py.statistics.traces.pandas import case_statistics

    if parameters is None:
        parameters = {}
    variant_stats = case_statistics.get_variant_statistics(df, parameters=parameters)
    activity_key = parameters[
        pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    log = EventLog()
    for vd in variant_stats:
        variant = vd['variant'].split(",")
        trace = Trace()
        for activity in variant:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        log.append(trace)
    return log
Example #10
0
def variant_to_trace(variant, parameters=None):
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    variant_delimiter = exec_utils.get_param_value(
        Parameters.PARAMETER_VARIANT_DELIMITER, parameters,
        constants.DEFAULT_VARIANT_SEP)

    from pm4py.objects.log.obj import Trace, Event

    trace = Trace()
    if type(variant) is tuple or type(variant) is list:
        for act in variant:
            event = Event({activity_key: act})
            trace.append(event)
    elif type(variant) is str:
        var_act = variant.split(variant_delimiter)
        for act in var_act:
            event = Event({activity_key: act})
            trace.append(event)

    return trace
Example #11
0
def apply(tree: ProcessTree, parameters : Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog:
    """
    Performs an extensive playout of the process tree

    Parameters
    -------------
    tree
        Process tree
    parameters
        Possible parameters, including:
        - Parameters.MIN_TRACE_LENGTH => minimum length of a trace (default: 1)
        - Parameters.MAX_TRACE_LENGTH => maximum length of a trace (default: min_allowed_trace_length)
        - Parameters.MAX_LOOP_OCC => maximum number of occurrences for a loop (default: MAX_TRACE_LENGTH)
        - Parameters.ACTIVITY_KEY => activity key
        - Parameters.MAX_LIMIT_NUM_TRACES => maximum number to the limit of traces; the playout shall stop when the number is reached (default: 100000)
    Returns
    -------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    # to save memory in the returned log, allocate each activity once. to know the list of activities of the
    # process tree, use the footprints module
    fp_tree = fp_discovery.apply(tree, parameters=parameters)
    activities = fp_tree["activities"]
    activities = {act: Event({activity_key: act}) for act in activities}

    min_allowed_trace_length = bottomup_discovery.get_min_trace_length(tree, parameters=parameters)
    min_trace_length = exec_utils.get_param_value(Parameters.MIN_TRACE_LENGTH, parameters, 1)
    max_trace_length = exec_utils.get_param_value(Parameters.MAX_TRACE_LENGTH, parameters, min_allowed_trace_length)
    max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC, parameters, int(max_trace_length / 2))
    max_limit_num_traces = exec_utils.get_param_value(Parameters.MAX_LIMIT_NUM_TRACES, parameters, 100000)
    return_set_strings = exec_utils.get_param_value(Parameters.RETURN_SET_STRINGS, parameters, False)

    bottomup = bottomup_discovery.get_bottomup_nodes(tree, parameters=parameters)
    min_rem_dict = bottomup_discovery.get_min_rem_dict(tree, parameters=parameters)
    max_rem_dict = bottomup_discovery.get_max_rem_dict(tree, parameters=parameters)

    playout_dictio = {}
    for i in range(len(bottomup)):
        get_playout(bottomup[i], playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
                    max_rem_dict, max_limit_num_traces)
    tree_playout_traces = playout_dictio[tree][TRACES]

    if return_set_strings:
        return tree_playout_traces

    log = EventLog()
    for tr0 in tree_playout_traces:
        trace = Trace()
        for act in tr0:
            trace.append(activities[act])
        log.append(trace)

    return log
def apply(df, parameters=None):
    """
    Convert a dataframe into a log containing N case per variant (only control-flow
    perspective is considered)

    Parameters
    -------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        Event log
    """
    from pm4py.statistics.traces.pandas import case_statistics

    if parameters is None:
        parameters = {}

    return_variants = parameters[
        RETURN_VARIANTS] if RETURN_VARIANTS in parameters else False

    case_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else pm4_constants.CASE_CONCEPT_NAME
    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    variant_stats = case_statistics.get_variant_statistics(
        df, parameters=parameters)

    log = EventLog()
    all_variants_log = {}
    for vd in variant_stats:
        variant = vd['variant'].split(",")
        variant_count = vd[case_glue]
        trace = Trace()
        for activity in variant:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        all_variants_log[vd['variant']] = []
        for i in range(variant_count):
            log.append(trace)
            all_variants_log[vd['variant']].append(len(log) - 1)

    if return_variants:
        return log, all_variants_log

    return log
Example #13
0
def apply(log: EventLog, act1: str, act2: str, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog:
    """
    Given an event log, filters all the subtraces going from an event with activity "act1" to an event with
    activity "act2"

    Parameters
    ----------------
    log
        Event log
    act1
        First activity
    act2
        Second activity
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY => activity key

    Returns
    ----------------
    filtered_log
        Log with all the subtraces going from "act1" to "act2"
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)

    filtered_log = EventLog(attributes=log.attributes, extensions=log.extensions, omni_present=log.omni_present,
                            classifiers=log.classifiers, properties=log.properties)

    for trace in log:
        act1_encountered = False
        filt_trace = None

        i = 0
        while i < len(trace) - 1:
            if not act1_encountered and trace[i][activity_key] == act1:
                act1_encountered = True
                filt_trace = Trace(attributes=trace.attributes)
                filt_trace.append(trace[i])
            elif act1_encountered and trace[i][activity_key] == act2:
                filt_trace.append(trace[i])
                filtered_log.append(filt_trace)
                act1_encountered = False
                filt_trace = None
            elif filt_trace is not None:
                filt_trace.append(trace[i])

            i = i + 1

    return filtered_log
Example #14
0
def filter_log_by_attributes_threshold(log,
                                       attributes,
                                       variants,
                                       vc,
                                       threshold,
                                       attribute_key=xes.DEFAULT_NAME_KEY):
    """
    Keep only attributes which number of occurrences is above the threshold (or they belong to the first variant)

    Parameters
    ----------
    log
        Log
    attributes
        Dictionary of attributes associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove attributes which number of occurrences is below the threshold)
    attribute_key
        (If specified) Specify the activity key in the log (default concept:name)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog(list(),
                            attributes=log.attributes,
                            extensions=log.extensions,
                            classifiers=log.classifiers,
                            omni_present=log.omni_present,
                            properties=log.properties)
    fva = [
        x[attribute_key] for x in variants[vc[0][0]][0] if attribute_key in x
    ]
    for trace in log:
        new_trace = Trace()
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if attribute_value in attributes:
                    if (attribute_value in fva
                            and attribute_key == xes.DEFAULT_NAME_KEY
                        ) or attributes[attribute_value] >= threshold:
                        new_trace.append(trace[j])
        if len(new_trace) > 0:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            filtered_log.append(new_trace)
    return filtered_log
Example #15
0
def acyclic_net_variants(net,
                         initial_marking,
                         final_marking,
                         activity_key=xes_util.DEFAULT_NAME_KEY):
    """
    Given an acyclic accepting Petri net, initial and final marking extracts a set of variants (in form of traces)
    replayable on the net.
    Warning: this function is based on a marking exploration. If the accepting Petri net contains loops, the method
    will not work properly as it stops the search if a specific marking has already been encountered.

    Parameters
    ----------
    :param net: An acyclic workflow net
    :param initial_marking: The initial marking of the net.
    :param final_marking: The final marking of the net.
    :param activity_key: activity key to use

    Returns
    -------
    :return: variants: :class:`list` Set of variants - in the form of Trace objects - obtainable executing the net

    """
    active = {(initial_marking, ())}
    visited = set()
    variants = set()
    while active:
        curr_marking, curr_partial_trace = active.pop()
        curr_pair = (curr_marking, curr_partial_trace)
        enabled_transitions = semantics.enabled_transitions(net, curr_marking)
        for transition in enabled_transitions:
            if transition.label is not None:
                next_partial_trace = curr_partial_trace + (transition.label, )
            else:
                next_partial_trace = curr_partial_trace
            next_marking = semantics.execute(transition, net, curr_marking)
            next_pair = (next_marking, next_partial_trace)

            if next_marking == final_marking:
                variants.add(next_partial_trace)
            else:
                # If the next marking is not in visited, if the next marking+partial trace is different from the current one+partial trace
                if next_pair not in visited and curr_pair != next_pair:
                    active.add(next_pair)
        visited.add(curr_pair)
    trace_variants = []
    for variant in variants:
        trace = Trace()
        for activity_label in variant:
            trace.append(Event({activity_key: activity_label}))
        trace_variants.append(trace)
    return trace_variants
Example #16
0
def filter_log_traces_attr(log, values, parameters=None):
    """
    Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided
    values list

    Parameters
    -----------
    log
        Trace log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """

    # CODE SAVING FROM FILTERS

    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)

    filtered_log = EventLog()
    for trace in log:
        new_trace = Trace()

        found = False
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if attribute_value in values:
                    found = True

        if (found and positive) or (not found and not positive):
            new_trace = trace
        else:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]

        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
def apply(log, values, parameters=None):
    """
    Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided
    values list

    Parameters
    -----------
    log
        Trace log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Attribute identifying the activity in the log
            Parameters.POSITIVE -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)

    filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
                            omni_present=log.omni_present, properties=log.properties)
    for trace in log:
        new_trace = Trace()

        found = False
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if attribute_value in values:
                    found = True

        if (found and positive) or (not found and not positive):
            new_trace = trace
        else:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]

        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
Example #18
0
def apply_trace_attributes(log, list_of_values, parameters=None):
    """
    Filter log by keeping only traces that has/has not certain case attribute value that belongs to the provided
    values list

    Parameters
    -----------
    log
        Trace log
    values
        Allowed attribute values(if it's numerical value, [] is needed to make it a list)
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the case in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY,
                                               parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters,
                                          True)

    filtered_log = EventLog()
    for trace in log:
        new_trace = Trace()

        found = False
        if attribute_key in trace.attributes:
            attribute_value = trace.attributes[attribute_key]
            if attribute_value in list_of_values:
                found = True

        if (found and positive) or (not found and not positive):
            new_trace = trace
        else:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]

        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
Example #19
0
 def read_trace(self) -> Trace:
     if self.i < self.no_traces:
         case_id = self.c_unq[self.i]
         si = self.c_ind[self.i]
         ei = si + self.c_counts[self.i]
         trace = Trace(
             attributes={xes_constants.DEFAULT_TRACEID_KEY: case_id})
         for j in range(si, ei):
             event = Event({
                 xes_constants.DEFAULT_NAME_KEY:
                 self.activities[j],
                 xes_constants.DEFAULT_TIMESTAMP_KEY:
                 self.timestamps[j]
             })
             trace.append(event)
         self.i = self.i + 1
         return trace
Example #20
0
def parse_event_log_string(
        traces: Collection[str],
        sep: str = ",",
        activity_key: str = xes_constants.DEFAULT_NAME_KEY,
        timestamp_key: str = xes_constants.DEFAULT_TIMESTAMP_KEY,
        case_id_key: str = xes_constants.DEFAULT_TRACEID_KEY) -> EventLog:
    """
    Parse a collection of traces expressed as strings
    (e.g., ["A,B,C,D", "A,C,B,D", "A,D"])
    to an event log

    Parameters
    ------------------
    traces
        Collection of traces expressed as strings
    sep
        Separator used to split the activities of a string trace
    activity_key
        The attribute that should be used as activity
    timestamp_key
        The attribute that should be used as timestamp
    case_id_key
        The attribute that should be used as case identifier

    Returns
    -----------------
    log
        Event log
    """
    log = EventLog()
    this_timest = 10000000
    for index, trace in enumerate(traces):
        activities = trace.split(sep)
        trace = Trace()
        trace.attributes[case_id_key] = str(index)
        for act in activities:
            event = Event({
                activity_key:
                act,
                timestamp_key:
                datetime.datetime.fromtimestamp(this_timest)
            })
            trace.append(event)
            this_timest = this_timest + 1
        log.append(trace)
    return log
Example #21
0
def project(log, groups, activity_key):
    # refactored to support both IM and IMf
    logs = list()
    for group in groups:
        logs.append(EventLog())
    for t in log:
        count = {i: 0 for i in range(len(groups))}
        for index, group in enumerate(groups):
            for e in t:
                if e[activity_key] in group:
                    count[index] += 1
        count = sorted(list((x, y) for x, y in count.items()), key=lambda x: (x[1], x[0]), reverse=True)
        new_trace = Trace()
        for e in t:
            if e[activity_key] in groups[count[0][0]]:
                new_trace.append(e)
        logs[count[0][0]].append(new_trace)
    return logs
Example #22
0
def execute_script():
    L = EventLog()
    e1 = Event()
    e1["concept:name"] = "A"
    e2 = Event()
    e2["concept:name"] = "B"
    e3 = Event()
    e3["concept:name"] = "C"
    e4 = Event()
    e4["concept:name"] = "D"
    t = Trace()
    t.append(e1)
    t.append(e2)
    t.append(e3)
    t.append(e4)
    for i in range(10000):
        L.append(deepcopy(t))
    print(len(L))
Example #23
0
def generate_log(pt0, no_traces=100):
    """
    Generate a log out of a process tree

    Parameters
    ------------
    pt
        Process tree
    no_traces
        Number of traces contained in the process tree

    Returns
    ------------
    log
        Trace log object
    """
    pt = deepcopy(pt0)
    # different taus must give different ID in log generation!!!!
    # so we cannot use the default process tree class
    # we use this different one!
    pt = GenerationTree(pt)
    log = EventLog()

    # assigns to each event an increased timestamp from 1970
    curr_timestamp = 10000000

    for i in range(no_traces):
        ex_seq = execute(pt)
        ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq)
        trace = Trace()
        trace.attributes[xes.DEFAULT_NAME_KEY] = str(i)
        for label in ex_seq_labels:
            event = Event()
            event[xes.DEFAULT_NAME_KEY] = label
            event[xes.DEFAULT_TIMESTAMP_KEY] = datetime.datetime.fromtimestamp(curr_timestamp)

            trace.append(event)

            curr_timestamp = curr_timestamp + 1

        log.append(trace)

    return log
Example #24
0
def apply(
    tree: ProcessTree,
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Gets the top-bottom playout of a process tree

    Parameters
    ---------------
    tree
        Process tree
    parameters
        Parameters of the algorithm, including:
            - Parameters.ACTIVITY_KEY: activity key
            - Parameters.NO_TRACES: number of traces that should be returned

    Returns
    ---------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    no_traces = exec_utils.get_param_value(Parameters.NO_TRACES, parameters,
                                           1000)

    execution_sequences = get_num_ex_sequences(tree, no_traces)

    log = EventLog()
    for seq in execution_sequences:
        trace = Trace()
        for el in seq:
            if el.label is not None:
                event = Event({activity_key: el.label})
                trace.append(event)
        log.append(trace)

    return log
Example #25
0
def keep_only_one_attribute_per_event(log, attribute_key):
    """
    Keeps only one attribute per event

    Parameters
    ---------------
    log
        Event log
    attribute_key
        Attribute key
    """
    new_log = EventLog()
    if log is not None:
        for trace in log:
            new_trace = Trace()
            for ev in trace:
                new_trace.append(Event({attribute_key: ev[attribute_key]}))
            new_log.append(new_trace)

    return new_log
Example #26
0
def form_fake_log(prefixes_keys, activity_key=xes_util.DEFAULT_NAME_KEY):
    """
    Form fake log for replay (putting each prefix as separate trace to align)

    Parameters
    ----------
    prefixes_keys
        Keys of the prefixes (to form a log with a given order)
    activity_key
        Activity key (must be provided if different from concept:name)
    """
    fake_log = EventLog()
    for prefix in prefixes_keys:
        trace = Trace()
        prefix_activities = prefix.split(constants.DEFAULT_VARIANT_SEP)
        for activity in prefix_activities:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        fake_log.append(trace)
    return fake_log
Example #27
0
def check_is_fitting(*args, activity_key=xes_constants.DEFAULT_NAME_KEY):
    """
    Checks if a trace object is fit against a process model

    Parameters
    -----------------
    trace
        Trace object (trace / variant)
    model
        Model (process tree, Petri net, BPMN, ...)
    activity_key
        Activity key (optional)

    Returns
    -----------------
    is_fit
        Boolean value (True if the trace fits; False if the trace does not)
    """
    from pm4py.util import variants_util
    from pm4py.convert import convert_to_process_tree, convert_to_petri_net

    trace = args[0]
    model = args[1:]

    try:
        model = convert_to_process_tree(*model)
    except:
        # the model cannot be expressed as a process tree, let's say if at least can be expressed as a Petri net
        model = convert_to_petri_net(*model)

    if not isinstance(trace, Trace):
        activities = variants_util.get_activities_from_variant(trace)
        trace = Trace()
        for act in activities:
            trace.append(Event({activity_key: act}))

    if isinstance(model, ProcessTree):
        return __check_is_fit_process_tree(trace, model, activity_key=activity_key)
    elif isinstance(model, tuple) and isinstance(model[0], PetriNet):
        return __check_is_fit_petri_net(trace, model[0], model[1], model[2], activity_key=activity_key)
Example #28
0
def project(log, groups, activity_key):
    '''
    This method projects the log based on a presumed sequence cut and a list of activity groups
    Parameters
    ----------
    log
        original log
    groups
        list of activity sets to be used in projection (activities can only appear in one group)
    activity_key
        key to use in the event to derive the activity name

    Returns
    -------
        list of corresponding logs according to the sequence cut.
    '''
    # refactored to support both IM and IMf
    logs = list()
    for group in groups:
        logs.append(EventLog())
    for t in log:
        i = 0
        split_point = 0
        act_union = set()
        while i < len(groups):
            new_split_point = find_split_point(t, groups[i], split_point,
                                               act_union, activity_key)
            trace_i = Trace()
            j = split_point
            while j < new_split_point:
                if t[j][activity_key] in groups[i]:
                    trace_i.append(t[j])
                j = j + 1
            logs[i].append(trace_i)
            split_point = new_split_point
            act_union = act_union.union(set(groups[i]))
            i = i + 1
    return logs
def from_dicts_to_trace(event_dicts, trace_info_dict):
    events = [from_dict_to_event(ed) for ed in event_dicts]
    return Trace(events, attributes=trace_info_dict)
Example #30
0
def __approximate_alignment_on_sequence(pt: ProcessTree, trace: Trace, a_sets: Dict[ProcessTree, Set[str]],
                                        sa_sets: Dict[ProcessTree, Set[str]], ea_sets: Dict[ProcessTree, Set[str]],
                                        tau_flags: Dict[ProcessTree, bool], tl: int, th: int,
                                        parameters=None):
    if parameters is None:
        parameters = {}
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, DEFAULT_NAME_KEY)

    assert pt.operator == Operator.SEQUENCE
    assert len(pt.children) > 0
    assert len(trace) > 0

    # x_i_j = 1 <=> assigns activity i to subtree j
    x__variables = {}

    # s_i_j = 1 <=> activity i is a start activity in the current sub-trace assigned to subtree j
    s__variables = {}

    # e_i_j = 1 <=> activity i is an end activity in the current sub-trace assigned to subtree j
    e__variables = {}

    # auxiliary u_j <=> u_j=1 if an activity is assigned to subtree j
    u__variables = {}

    # v_i_j = 1 <=> activity i is neither a start nor end-activity in the current sub-trace assigned to subtree j
    v__variables = {}

    s__costs = {}
    e__costs = {}
    u__costs = {}
    v__costs = {}

    all_variables = []

    for i, a in enumerate(trace):
        x__variables[i] = {}
        s__variables[i] = {}
        s__costs[i] = {}
        e__variables[i] = {}
        e__costs[i] = {}
        v__variables[i] = {}
        v__costs[i] = {}

        for j, subtree in enumerate(pt.children):
            all_variables.append('x_' + str(i) + '_' + str(j))
            x__variables[i][j] = len(all_variables) - 1
            all_variables.append('s_' + str(i) + '_' + str(j))
            s__variables[i][j] = len(all_variables) - 1
            all_variables.append('e_' + str(i) + '_' + str(j))
            e__variables[i][j] = len(all_variables) - 1
            all_variables.append('v_' + str(i) + '_' + str(j))
            v__variables[i][j] = len(all_variables) - 1
            s__costs[i][j] = 0 if a[activity_key] in sa_sets[subtree] else 1
            e__costs[i][j] = 0 if a[activity_key] in ea_sets[subtree] else 1
            v__costs[i][j] = 0 if a[activity_key] in a_sets[subtree] else 1

    for j in range(len(pt.children)):
        all_variables.append('u_' + str(j))
        u__variables[j] = len(all_variables) - 1
        # define costs to not assign anything to subtree j
        if tau_flags[pt.children[j]]:
            u__costs[j] = 0
        elif sa_sets[pt.children[j]] & ea_sets[pt.children[j]]:
            # intersection of start-activities and end-activities is not empty
            u__costs[j] = 1
        else:
            # intersection of start-activities and end-activities is empty
            u__costs[j] = 2

    # objective function
    c = [0] * len(all_variables)
    for i in range(len(trace)):
        for j in range(len(pt.children)):
            c[v__variables[i][j]] = v__costs[i][j]
    for i in range(len(trace)):
        for j in range(len(pt.children)):
            c[s__variables[i][j]] = s__costs[i][j]
    for i in range(len(trace)):
        for j in range(len(pt.children)):
            c[e__variables[i][j]] = e__costs[i][j]
    for j in range(len(pt.children)):
        c[u__variables[j]] = -u__costs[j]
    Aub = []
    bub = []
    Aeq = []
    beq = []

    # every activity is assigned to one subtree
    for i in range(len(trace)):
        r = [0] * len(all_variables)
        for j in range(len(pt.children)):
            r[x__variables[i][j]] = 1
        Aeq.append(r)
        beq.append(1)

    for j in range(len(pt.children)):
        r1 = [0] * len(all_variables)
        r2 = [0] * len(all_variables)
        # first activity is start activity
        r1[x__variables[0][j]] = 1
        r1[s__variables[0][j]] = -1
        # last activity is an end activity
        r2[x__variables[len(trace) - 1][j]] = 1
        r2[e__variables[len(trace) - 1][j]] = -1
        Aub.append(r1)
        Aub.append(r2)
        bub.append(0)
        bub.append(0)

    # define s_i_j variables
    for i in range(1, len(trace)):
        for j in range(len(pt.children)):
            r1 = [0] * len(all_variables)
            r2 = [0] * len(all_variables)
            r3 = [0] * len(all_variables)
            r1[s__variables[i][j]] = -1
            r1[x__variables[i][j]] = 1
            r1[x__variables[i - 1][j]] = -1
            r2[s__variables[i][j]] = 1
            r2[x__variables[i][j]] = -1
            r3[s__variables[i][j]] = 1
            r3[x__variables[i - 1][j]] = 1
            Aub.append(r1)
            Aub.append(r2)
            Aub.append(r3)
            bub.append(0)
            bub.append(0)
            bub.append(1)

    for i in range(len(trace)):
        r = [0] * len(all_variables)
        for j in range(len(pt.children)):
            r[s__variables[i][j]] = 1
        Aub.append(r)
        bub.append(1)

    # define e_i_j variables
    for i in range(len(trace) - 1):
        for j in range(len(pt.children)):
            r1 = [0] * len(all_variables)
            r2 = [0] * len(all_variables)
            r3 = [0] * len(all_variables)
            r1[e__variables[i][j]] = -1
            r1[x__variables[i][j]] = 1
            r1[x__variables[i + 1][j]] = -1
            r2[e__variables[i][j]] = 1
            r2[x__variables[i][j]] = -1
            r3[e__variables[i][j]] = 1
            r3[x__variables[i + 1][j]] = 1
            Aub.append(r1)
            Aub.append(r2)
            Aub.append(r3)
            bub.append(0)
            bub.append(0)
            bub.append(1)
    for i in range(len(trace)):
        # activity can be only for one subtree an end-activity
        r = [0] * len(all_variables)
        for j in range(len(pt.children)):
            r[e__variables[i][j]] = 1
        Aub.append(r)
        bub.append(1)

    # constraint - preserving sequence when assigning activities to subtrees
    for i in range(len(trace) - 1):
        for j in range(len(pt.children)):
            r = [0] * len(all_variables)
            for k in range(j, len(pt.children)):
                r[x__variables[i + 1][k]] = -1
            r[x__variables[i][j]] = 1
            Aub.append(r)
            bub.append(0)

    # define u_j variables
    for j in range(len(pt.children)):
        for i in range(len(trace)):
            r = [0] * len(all_variables)
            r[u__variables[j]] = -1
            r[x__variables[i][j]] = 1
            Aub.append(r)
            bub.append(0)
        r1 = [0] * len(all_variables)
        r2 = [0] * len(all_variables)
        r1[u__variables[j]] = 1
        r2[u__variables[j]] = 1

        for i in range(len(trace)):
            r1[s__variables[i][j]] = -1
            r2[e__variables[i][j]] = -1
        Aub.append(r1)
        Aub.append(r2)
        bub.append(0)
        bub.append(0)

    # define v_i_j variables
    for i in range(len(trace)):
        for j in range(2):
            r1 = [0] * len(all_variables)
            r2 = [0] * len(all_variables)
            r3 = [0] * len(all_variables)
            r4 = [0] * len(all_variables)

            r1[v__variables[i][j]] = -1
            r1[s__variables[i][j]] = -1
            r1[e__variables[i][j]] = -1
            r1[x__variables[i][j]] = 1

            r2[v__variables[i][j]] = 1
            r2[x__variables[i][j]] = -1

            r3[v__variables[i][j]] = 1
            r3[e__variables[i][j]] = 1

            r4[v__variables[i][j]] = 1
            r4[s__variables[i][j]] = 1

            Aub.append(r1)
            Aub.append(r2)
            Aub.append(r3)
            Aub.append(r4)
            bub.append(0)
            bub.append(0)
            bub.append(1)
            bub.append(1)

    for idx, v in enumerate(all_variables):
        r = [0] * len(all_variables)
        r[idx] = -1
        Aub.append(r)
        bub.append(0)
        r = [0] * len(all_variables)
        r[idx] = 1
        Aub.append(r)
        bub.append(1)

    points = __ilp_solve(c, Aub, bub, Aeq, beq)

    for i in x__variables:
        for j in x__variables[i]:
            x__variables[i][j] = True if points[x__variables[i][j]] == 1 else False

    x_variables = x__variables

    alignments_to_calculate: List[Tuple[ProcessTree, Trace]] = []
    for j in range(len(pt.children)):
        sub_trace = Trace()
        for i in range(len(trace)):
            if x_variables[i][j] == 1:
                sub_trace.append(trace[i])
        alignments_to_calculate.append((pt.children[j], sub_trace))
    # calculate and compose alignments
    res = []
    for subtree, sub_trace in alignments_to_calculate:
        align_result = __approximate_alignment_for_trace(subtree, a_sets, sa_sets, ea_sets, tau_flags, sub_trace, tl,
                                                         th,
                                                         parameters=parameters)
        if align_result is None:
            # the alignment did not terminate correctly
            return None
        res.extend(align_result)

    return res