Example #1
0
def variant_to_trace(variant, parameters=None):
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    variant_delimiter = exec_utils.get_param_value(
        Parameters.PARAMETER_VARIANT_DELIMITER, parameters,
        constants.DEFAULT_VARIANT_SEP)

    from pm4py.objects.log.obj import Trace, Event

    trace = Trace()
    if type(variant) is tuple or type(variant) is list:
        for act in variant:
            event = Event({activity_key: act})
            trace.append(event)
    elif type(variant) is str:
        var_act = variant.split(variant_delimiter)
        for act in var_act:
            event = Event({activity_key: act})
            trace.append(event)

    return trace
Example #2
0
def form_log_from_dictio_couple(first_cases_repr,
                                second_cases_repr,
                                enable_multiplier=False):
    """
    Form a log from a couple of dictionary, to use for
    root cause analysis

    Parameters
    -------------
    first_cases_repr
        First cases representation
    second_cases_repr
        Second cases representation
    enable_multiplier
        Enable balancing of classes

    Returns
    ------------
    log
        Trace log object
    """
    log = EventLog()

    if enable_multiplier:
        multiplier_first = int(
            max(
                float(len(second_cases_repr)) / float(len(first_cases_repr)),
                1))
        multiplier_second = int(
            max(
                float(len(first_cases_repr)) / float(len(second_cases_repr)),
                1))
    else:
        multiplier_first = 1
        multiplier_second = 1

    for j in range(multiplier_first):
        for i in range(len(first_cases_repr)):
            trace = Trace()
            event = Event(first_cases_repr[i])
            trace.append(event)
            log.append(trace)

    for j in range(multiplier_second):
        for i in range(len(second_cases_repr)):
            trace = Trace()
            event = Event(second_cases_repr[i])
            trace.append(event)
            log.append(trace)

    return log
Example #3
0
def apply_tree_variants(variants, parameters=None):
    """
    Apply the IM algorithm to a dictionary of variants obtaining a process tree

    Parameters
    ----------
    variants
        Variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    log = EventLog()
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    var_keys = list(variants.keys())
    for var in var_keys:
        trace = Trace()
        activities = variants_util.get_activities_from_variant(var)
        for act in activities:
            trace.append(Event({activity_key: act}))
        log.append(trace)

    return apply_tree(log, parameters=parameters)
Example #4
0
def list_of_str_to_trace(activities: List[str]) -> Trace:
    t = Trace()
    for a in activities:
        e = Event()
        e["concept:name"] = a
        t.append(e)
    return t
Example #5
0
def apply(df, parameters=None):
    """
    Convert a dataframe into a log containing 1 case per variant (only control-flow
    perspective is considered)

    Parameters
    -------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        Event log
    """
    from pm4py.statistics.traces.pandas import case_statistics

    if parameters is None:
        parameters = {}
    variant_stats = case_statistics.get_variant_statistics(df, parameters=parameters)
    activity_key = parameters[
        pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    log = EventLog()
    for vd in variant_stats:
        variant = vd['variant'].split(",")
        trace = Trace()
        for activity in variant:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        log.append(trace)
    return log
Example #6
0
def apply(log, parameters=None):
    """
      Converts the event log to an event stream

      Parameters
      ----------
      log: :class:`pm4py.log.log.EventLog`
          An Event log
      include_case_attributes:
          Default is True
      case_attribute_prefix:
          Default is 'case:'
      enable_deepcopy
          Enables deepcopy (avoid references between input and output objects)

      Returns
          -------
      log : :class:`pm4py.log.log.EventLog`
          An Event stream
      """
    if parameters is None:
        parameters = {}

    stream_post_processing = exec_utils.get_param_value(
        Parameters.STREAM_POST_PROCESSING, parameters, False)
    case_pref = exec_utils.get_param_value(Parameters.CASE_ATTRIBUTE_PREFIX,
                                           parameters, 'case:')
    enable_deepcopy = exec_utils.get_param_value(Parameters.DEEP_COPY,
                                                 parameters, True)
    include_case_attributes = exec_utils.get_param_value(
        Parameters.INCLUDE_CASE_ATTRIBUTES, parameters, True)
    compress = exec_utils.get_param_value(Parameters.COMPRESS, parameters,
                                          True)

    if pkgutil.find_loader("pandas"):
        import pandas
        if isinstance(log, pandas.DataFrame):
            extensions = __detect_extensions(log)
            list_events = pandas_utils.to_dict_records(log)
            if stream_post_processing:
                list_events = __postprocess_stream(list_events)
            if compress:
                list_events = __compress(list_events)
            for i in range(len(list_events)):
                list_events[i] = Event(list_events[i])
            log = log_instance.EventStream(list_events,
                                           attributes={'origin': 'csv'})
            for ex in extensions:
                log.extensions[ex.name] = {
                    xes_constants.KEY_PREFIX: ex.prefix,
                    xes_constants.KEY_URI: ex.uri
                }
    if isinstance(log, EventLog):
        return __transform_event_log_to_event_stream(
            log,
            include_case_attributes=include_case_attributes,
            case_attribute_prefix=case_pref,
            enable_deepcopy=enable_deepcopy)
    return log
Example #7
0
def apply(tree: ProcessTree, parameters : Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog:
    """
    Performs an extensive playout of the process tree

    Parameters
    -------------
    tree
        Process tree
    parameters
        Possible parameters, including:
        - Parameters.MIN_TRACE_LENGTH => minimum length of a trace (default: 1)
        - Parameters.MAX_TRACE_LENGTH => maximum length of a trace (default: min_allowed_trace_length)
        - Parameters.MAX_LOOP_OCC => maximum number of occurrences for a loop (default: MAX_TRACE_LENGTH)
        - Parameters.ACTIVITY_KEY => activity key
        - Parameters.MAX_LIMIT_NUM_TRACES => maximum number to the limit of traces; the playout shall stop when the number is reached (default: 100000)
    Returns
    -------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    # to save memory in the returned log, allocate each activity once. to know the list of activities of the
    # process tree, use the footprints module
    fp_tree = fp_discovery.apply(tree, parameters=parameters)
    activities = fp_tree["activities"]
    activities = {act: Event({activity_key: act}) for act in activities}

    min_allowed_trace_length = bottomup_discovery.get_min_trace_length(tree, parameters=parameters)
    min_trace_length = exec_utils.get_param_value(Parameters.MIN_TRACE_LENGTH, parameters, 1)
    max_trace_length = exec_utils.get_param_value(Parameters.MAX_TRACE_LENGTH, parameters, min_allowed_trace_length)
    max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC, parameters, int(max_trace_length / 2))
    max_limit_num_traces = exec_utils.get_param_value(Parameters.MAX_LIMIT_NUM_TRACES, parameters, 100000)
    return_set_strings = exec_utils.get_param_value(Parameters.RETURN_SET_STRINGS, parameters, False)

    bottomup = bottomup_discovery.get_bottomup_nodes(tree, parameters=parameters)
    min_rem_dict = bottomup_discovery.get_min_rem_dict(tree, parameters=parameters)
    max_rem_dict = bottomup_discovery.get_max_rem_dict(tree, parameters=parameters)

    playout_dictio = {}
    for i in range(len(bottomup)):
        get_playout(bottomup[i], playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
                    max_rem_dict, max_limit_num_traces)
    tree_playout_traces = playout_dictio[tree][TRACES]

    if return_set_strings:
        return tree_playout_traces

    log = EventLog()
    for tr0 in tree_playout_traces:
        trace = Trace()
        for act in tr0:
            trace.append(activities[act])
        log.append(trace)

    return log
Example #8
0
def execute_script():
    L = EventLog()
    e1 = Event()
    e1["concept:name"] = "A"
    e2 = Event()
    e2["concept:name"] = "B"
    e3 = Event()
    e3["concept:name"] = "C"
    e4 = Event()
    e4["concept:name"] = "D"
    t = Trace()
    t.append(e1)
    t.append(e2)
    t.append(e3)
    t.append(e4)
    for i in range(10000):
        L.append(deepcopy(t))
    print(len(L))
def apply(df, parameters=None):
    """
    Convert a dataframe into a log containing N case per variant (only control-flow
    perspective is considered)

    Parameters
    -------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        Event log
    """
    from pm4py.statistics.traces.pandas import case_statistics

    if parameters is None:
        parameters = {}

    return_variants = parameters[
        RETURN_VARIANTS] if RETURN_VARIANTS in parameters else False

    case_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else pm4_constants.CASE_CONCEPT_NAME
    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    variant_stats = case_statistics.get_variant_statistics(
        df, parameters=parameters)

    log = EventLog()
    all_variants_log = {}
    for vd in variant_stats:
        variant = vd['variant'].split(",")
        variant_count = vd[case_glue]
        trace = Trace()
        for activity in variant:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        all_variants_log[vd['variant']] = []
        for i in range(variant_count):
            log.append(trace)
            all_variants_log[vd['variant']].append(len(log) - 1)

    if return_variants:
        return log, all_variants_log

    return log
Example #10
0
def insert_artificial_start_end(log: EventLog, parameters: Optional[Dict[Any, Any]] = None) -> EventLog:
    """
    Inserts the artificial start/end activities in an event log

    Parameters
    -------------------
    log
        Event log
     parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY: the activity
        - Parameters.TIMESTAMP_KEY: the timestamp

    Returns
    ------------------
    log
        Enriched log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY)

    artificial_start_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_START_ACTIVITY, parameters,
                                                           constants.DEFAULT_ARTIFICIAL_START_ACTIVITY)
    artificial_end_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters,
                                                         constants.DEFAULT_ARTIFICIAL_END_ACTIVITY)

    for trace in log:
        start_event = Event({activity_key: artificial_start_activity})
        end_event = Event({activity_key: artificial_end_activity})
        if trace:
            if timestamp_key in trace[0]:
                start_event[timestamp_key] = trace[0][timestamp_key] - datetime.timedelta(seconds=1)
            if timestamp_key in trace[-1]:
                end_event[timestamp_key] = trace[-1][timestamp_key] + datetime.timedelta(seconds=1)
        trace.insert(0, start_event)
        trace.append(end_event)

    return log
Example #11
0
def acyclic_net_variants(net,
                         initial_marking,
                         final_marking,
                         activity_key=xes_util.DEFAULT_NAME_KEY):
    """
    Given an acyclic accepting Petri net, initial and final marking extracts a set of variants (in form of traces)
    replayable on the net.
    Warning: this function is based on a marking exploration. If the accepting Petri net contains loops, the method
    will not work properly as it stops the search if a specific marking has already been encountered.

    Parameters
    ----------
    :param net: An acyclic workflow net
    :param initial_marking: The initial marking of the net.
    :param final_marking: The final marking of the net.
    :param activity_key: activity key to use

    Returns
    -------
    :return: variants: :class:`list` Set of variants - in the form of Trace objects - obtainable executing the net

    """
    active = {(initial_marking, ())}
    visited = set()
    variants = set()
    while active:
        curr_marking, curr_partial_trace = active.pop()
        curr_pair = (curr_marking, curr_partial_trace)
        enabled_transitions = semantics.enabled_transitions(net, curr_marking)
        for transition in enabled_transitions:
            if transition.label is not None:
                next_partial_trace = curr_partial_trace + (transition.label, )
            else:
                next_partial_trace = curr_partial_trace
            next_marking = semantics.execute(transition, net, curr_marking)
            next_pair = (next_marking, next_partial_trace)

            if next_marking == final_marking:
                variants.add(next_partial_trace)
            else:
                # If the next marking is not in visited, if the next marking+partial trace is different from the current one+partial trace
                if next_pair not in visited and curr_pair != next_pair:
                    active.add(next_pair)
        visited.add(curr_pair)
    trace_variants = []
    for variant in variants:
        trace = Trace()
        for activity_label in variant:
            trace.append(Event({activity_key: activity_label}))
        trace_variants.append(trace)
    return trace_variants
Example #12
0
 def read_trace(self) -> Trace:
     if self.i < self.no_traces:
         case_id = self.c_unq[self.i]
         si = self.c_ind[self.i]
         ei = si + self.c_counts[self.i]
         trace = Trace(
             attributes={xes_constants.DEFAULT_TRACEID_KEY: case_id})
         for j in range(si, ei):
             event = Event({
                 xes_constants.DEFAULT_NAME_KEY:
                 self.activities[j],
                 xes_constants.DEFAULT_TIMESTAMP_KEY:
                 self.timestamps[j]
             })
             trace.append(event)
         self.i = self.i + 1
         return trace
Example #13
0
def parse_event_log_string(
        traces: Collection[str],
        sep: str = ",",
        activity_key: str = xes_constants.DEFAULT_NAME_KEY,
        timestamp_key: str = xes_constants.DEFAULT_TIMESTAMP_KEY,
        case_id_key: str = xes_constants.DEFAULT_TRACEID_KEY) -> EventLog:
    """
    Parse a collection of traces expressed as strings
    (e.g., ["A,B,C,D", "A,C,B,D", "A,D"])
    to an event log

    Parameters
    ------------------
    traces
        Collection of traces expressed as strings
    sep
        Separator used to split the activities of a string trace
    activity_key
        The attribute that should be used as activity
    timestamp_key
        The attribute that should be used as timestamp
    case_id_key
        The attribute that should be used as case identifier

    Returns
    -----------------
    log
        Event log
    """
    log = EventLog()
    this_timest = 10000000
    for index, trace in enumerate(traces):
        activities = trace.split(sep)
        trace = Trace()
        trace.attributes[case_id_key] = str(index)
        for act in activities:
            event = Event({
                activity_key:
                act,
                timestamp_key:
                datetime.datetime.fromtimestamp(this_timest)
            })
            trace.append(event)
            this_timest = this_timest + 1
        log.append(trace)
    return log
Example #14
0
def generate_log(pt0, no_traces=100):
    """
    Generate a log out of a process tree

    Parameters
    ------------
    pt
        Process tree
    no_traces
        Number of traces contained in the process tree

    Returns
    ------------
    log
        Trace log object
    """
    pt = deepcopy(pt0)
    # different taus must give different ID in log generation!!!!
    # so we cannot use the default process tree class
    # we use this different one!
    pt = GenerationTree(pt)
    log = EventLog()

    # assigns to each event an increased timestamp from 1970
    curr_timestamp = 10000000

    for i in range(no_traces):
        ex_seq = execute(pt)
        ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq)
        trace = Trace()
        trace.attributes[xes.DEFAULT_NAME_KEY] = str(i)
        for label in ex_seq_labels:
            event = Event()
            event[xes.DEFAULT_NAME_KEY] = label
            event[xes.DEFAULT_TIMESTAMP_KEY] = datetime.datetime.fromtimestamp(curr_timestamp)

            trace.append(event)

            curr_timestamp = curr_timestamp + 1

        log.append(trace)

    return log
Example #15
0
def apply(
    tree: ProcessTree,
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> EventLog:
    """
    Gets the top-bottom playout of a process tree

    Parameters
    ---------------
    tree
        Process tree
    parameters
        Parameters of the algorithm, including:
            - Parameters.ACTIVITY_KEY: activity key
            - Parameters.NO_TRACES: number of traces that should be returned

    Returns
    ---------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    no_traces = exec_utils.get_param_value(Parameters.NO_TRACES, parameters,
                                           1000)

    execution_sequences = get_num_ex_sequences(tree, no_traces)

    log = EventLog()
    for seq in execution_sequences:
        trace = Trace()
        for el in seq:
            if el.label is not None:
                event = Event({activity_key: el.label})
                trace.append(event)
        log.append(trace)

    return log
Example #16
0
def keep_only_one_attribute_per_event(log, attribute_key):
    """
    Keeps only one attribute per event

    Parameters
    ---------------
    log
        Event log
    attribute_key
        Attribute key
    """
    new_log = EventLog()
    if log is not None:
        for trace in log:
            new_trace = Trace()
            for ev in trace:
                new_trace.append(Event({attribute_key: ev[attribute_key]}))
            new_log.append(new_trace)

    return new_log
Example #17
0
def form_fake_log(prefixes_keys, activity_key=xes_util.DEFAULT_NAME_KEY):
    """
    Form fake log for replay (putting each prefix as separate trace to align)

    Parameters
    ----------
    prefixes_keys
        Keys of the prefixes (to form a log with a given order)
    activity_key
        Activity key (must be provided if different from concept:name)
    """
    fake_log = EventLog()
    for prefix in prefixes_keys:
        trace = Trace()
        prefix_activities = prefix.split(constants.DEFAULT_VARIANT_SEP)
        for activity in prefix_activities:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        fake_log.append(trace)
    return fake_log
Example #18
0
def check_is_fitting(*args, activity_key=xes_constants.DEFAULT_NAME_KEY):
    """
    Checks if a trace object is fit against a process model

    Parameters
    -----------------
    trace
        Trace object (trace / variant)
    model
        Model (process tree, Petri net, BPMN, ...)
    activity_key
        Activity key (optional)

    Returns
    -----------------
    is_fit
        Boolean value (True if the trace fits; False if the trace does not)
    """
    from pm4py.util import variants_util
    from pm4py.convert import convert_to_process_tree, convert_to_petri_net

    trace = args[0]
    model = args[1:]

    try:
        model = convert_to_process_tree(*model)
    except:
        # the model cannot be expressed as a process tree, let's say if at least can be expressed as a Petri net
        model = convert_to_petri_net(*model)

    if not isinstance(trace, Trace):
        activities = variants_util.get_activities_from_variant(trace)
        trace = Trace()
        for act in activities:
            trace.append(Event({activity_key: act}))

    if isinstance(model, ProcessTree):
        return __check_is_fit_process_tree(trace, model, activity_key=activity_key)
    elif isinstance(model, tuple) and isinstance(model[0], PetriNet):
        return __check_is_fit_petri_net(trace, model[0], model[1], model[2], activity_key=activity_key)
Example #19
0
def import_from_context(context, num_traces, parameters=None):
    """
    Import a XES log from an iterparse context

    Parameters
    --------------
    context
        Iterparse context
    num_traces
        Number of traces of the XES log
    parameters
        Parameters of the algorithm

    Returns
    --------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}

    max_no_traces_to_import = exec_utils.get_param_value(Parameters.MAX_TRACES, parameters, sys.maxsize)
    timestamp_sort = exec_utils.get_param_value(Parameters.TIMESTAMP_SORT, parameters, False)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
                                               xes_constants.DEFAULT_TIMESTAMP_KEY)
    reverse_sort = exec_utils.get_param_value(Parameters.REVERSE_SORT, parameters, False)
    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)

    date_parser = dt_parser.get()
    progress = None
    if pkgutil.find_loader("tqdm") and show_progress_bar:
        from tqdm.auto import tqdm
        progress = tqdm(total=num_traces, desc="parsing log, completed traces :: ")

    log = None
    trace = None
    event = None

    tree = {}

    for tree_event, elem in context:
        if tree_event == _EVENT_START:  # starting to read
            parent = tree[elem.getparent()] if elem.getparent() in tree else None

            if elem.tag.endswith(xes_constants.TAG_STRING):
                if parent is not None:
                    tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY),
                                             elem.get(xes_constants.KEY_VALUE), tree)
                continue

            elif elem.tag.endswith(xes_constants.TAG_DATE):
                try:
                    dt = date_parser.apply(elem.get(xes_constants.KEY_VALUE))
                    tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), dt, tree)
                except TypeError:
                    logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE)))
                except ValueError:
                    logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_EVENT):
                if event is not None:
                    raise SyntaxError('file contains <event> in another <event> tag')
                event = Event()
                tree[elem] = event
                continue

            elif elem.tag.endswith(xes_constants.TAG_TRACE):
                if len(log) >= max_no_traces_to_import:
                    break
                if trace is not None:
                    raise SyntaxError('file contains <trace> in another <trace> tag')
                trace = Trace()
                tree[elem] = trace.attributes
                continue

            elif elem.tag.endswith(xes_constants.TAG_FLOAT):
                if parent is not None:
                    try:
                        val = float(elem.get(xes_constants.KEY_VALUE))
                        tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), val, tree)
                    except ValueError:
                        logging.info("failed to parse float: " + str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_INT):
                if parent is not None:
                    try:
                        val = int(elem.get(xes_constants.KEY_VALUE))
                        tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), val, tree)
                    except ValueError:
                        logging.info("failed to parse int: " + str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_BOOLEAN):
                if parent is not None:
                    try:
                        val0 = elem.get(xes_constants.KEY_VALUE)
                        val = False
                        if str(val0).lower() == "true":
                            val = True
                        tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), val, tree)
                    except ValueError:
                        logging.info("failed to parse boolean: " + str(elem.get(xes_constants.KEY_VALUE)))
                continue

            elif elem.tag.endswith(xes_constants.TAG_LIST):
                if parent is not None:
                    # lists have no value, hence we put None as a value
                    tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), None, tree)
                continue

            elif elem.tag.endswith(xes_constants.TAG_ID):
                if parent is not None:
                    tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY),
                                             elem.get(xes_constants.KEY_VALUE), tree)
                continue

            elif elem.tag.endswith(xes_constants.TAG_EXTENSION):
                if log is None:
                    raise SyntaxError('extension found outside of <log> tag')
                if elem.get(xes_constants.KEY_NAME) is not None and elem.get(
                        xes_constants.KEY_PREFIX) is not None and elem.get(xes_constants.KEY_URI) is not None:
                    log.extensions[elem.get(xes_constants.KEY_NAME)] = {
                        xes_constants.KEY_PREFIX: elem.get(xes_constants.KEY_PREFIX),
                        xes_constants.KEY_URI: elem.get(xes_constants.KEY_URI)}
                continue

            elif elem.tag.endswith(xes_constants.TAG_GLOBAL):
                if log is None:
                    raise SyntaxError('global found outside of <log> tag')
                if elem.get(xes_constants.KEY_SCOPE) is not None:
                    log.omni_present[elem.get(xes_constants.KEY_SCOPE)] = {}
                    tree[elem] = log.omni_present[elem.get(xes_constants.KEY_SCOPE)]
                continue

            elif elem.tag.endswith(xes_constants.TAG_CLASSIFIER):
                if log is None:
                    raise SyntaxError('classifier found outside of <log> tag')
                if elem.get(xes_constants.KEY_KEYS) is not None:
                    classifier_value = elem.get(xes_constants.KEY_KEYS)
                    if "'" in classifier_value:
                        log.classifiers[elem.get(xes_constants.KEY_NAME)] = [x for x in classifier_value.split("'")
                                                                             if x.strip()]
                    else:
                        log.classifiers[elem.get(xes_constants.KEY_NAME)] = classifier_value.split()
                continue

            elif elem.tag.endswith(xes_constants.TAG_LOG):
                if log is not None:
                    raise SyntaxError('file contains > 1 <log> tags')
                log = EventLog()
                tree[elem] = log.attributes
                continue

        elif tree_event == _EVENT_END:
            if elem in tree:
                del tree[elem]
            elem.clear()
            if elem.getprevious() is not None:
                try:
                    del elem.getparent()[0]
                except TypeError:
                    pass

            if elem.tag.endswith(xes_constants.TAG_EVENT):
                if trace is not None:
                    trace.append(event)
                    event = None
                continue

            elif elem.tag.endswith(xes_constants.TAG_TRACE):
                log.append(trace)

                if progress is not None:
                    progress.update()

                trace = None
                continue

            elif elem.tag.endswith(xes_constants.TAG_LOG):
                continue

    # gracefully close progress bar
    if progress is not None:
        progress.close()
    del context, progress

    if timestamp_sort:
        log = sorting.sort_timestamp(log, timestamp_key=timestamp_key, reverse_sort=reverse_sort)

    # sets the activity key as default classifier in the log's properties
    log.properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_constants.DEFAULT_NAME_KEY
    log.properties[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = xes_constants.DEFAULT_NAME_KEY
    # sets the default timestamp key
    log.properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_constants.DEFAULT_TIMESTAMP_KEY
    # sets the default resource key
    log.properties[constants.PARAMETER_CONSTANT_RESOURCE_KEY] = xes_constants.DEFAULT_RESOURCE_KEY
    # sets the default transition key
    log.properties[constants.PARAMETER_CONSTANT_TRANSITION_KEY] = xes_constants.DEFAULT_TRANSITION_KEY
    # sets the default group key
    log.properties[constants.PARAMETER_CONSTANT_GROUP_KEY] = xes_constants.DEFAULT_GROUP_KEY

    return log
Example #20
0
def import_log_from_file_object(f,
                                encoding,
                                file_size=sys.maxsize,
                                parameters=None):
    """
    Import a log object from a (XML) file object

    Parameters
    -----------
    f
        file object
    encoding
        Encoding
    file_size
        Size of the file (measured on disk)
    parameters
        Parameters of the algorithm, including
            Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp
            Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key
            Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted
            Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file)
            Parameters.MAX_BYTES -> Maximum number of bytes to read
            Parameters.SKYP_BYTES -> Number of bytes to skip
            Parameters.SET_ATTRIBUTES_TO_READ -> Names of the attributes that should be parsed. If not specified,
                                                then, all the attributes are parsed.

    Returns
    -----------
    log
        Log file
    """
    values_dict = {}
    date_parser = dt_parser.get()

    set_attributes_to_read = exec_utils.get_param_value(
        Parameters.SET_ATTRIBUTES_TO_READ, parameters, None)
    max_no_traces_to_import = exec_utils.get_param_value(
        Parameters.MAX_TRACES, parameters, sys.maxsize)
    timestamp_sort = exec_utils.get_param_value(Parameters.TIMESTAMP_SORT,
                                                parameters, False)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    reverse_sort = exec_utils.get_param_value(Parameters.REVERSE_SORT,
                                              parameters, False)

    skip_bytes = exec_utils.get_param_value(Parameters.SKIP_BYTES, parameters,
                                            False)
    max_bytes_to_read = exec_utils.get_param_value(Parameters.MAX_BYTES,
                                                   parameters, sys.maxsize)

    if file_size > max_bytes_to_read:
        skip_bytes = file_size - max_bytes_to_read

    log = EventLog()
    tracecount = 0
    trace = None
    event = None

    f.seek(skip_bytes)

    for line in f:
        content = line.decode(encoding).split("\"")
        if len(content) > 0:
            tag = content[0].split("<")[-1]
            if trace is not None:
                if event is not None:
                    if len(content) == 5:
                        key, value = read_attribute_key_value(
                            tag, content, date_parser, values_dict,
                            set_attributes_to_read)
                        if value is not None:
                            event[key] = value
                    elif tag.startswith("/event"):
                        trace.append(event)
                        event = None
                elif tag.startswith("event"):
                    event = Event()
                elif len(content) == 5:
                    key, value = read_attribute_key_value(
                        tag, content, date_parser, values_dict,
                        set_attributes_to_read)
                    if value is not None:
                        trace.attributes[key] = value
                elif tag.startswith("/trace"):
                    log.append(trace)
                    tracecount += 1
                    if tracecount > max_no_traces_to_import:
                        break
                    trace = None
            elif tag.startswith("trace"):
                trace = Trace()

    if timestamp_sort:
        log = sorting.sort_timestamp(log,
                                     timestamp_key=timestamp_key,
                                     reverse_sort=reverse_sort)

    # sets the activity key as default classifier in the log's properties
    log.properties[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_constants.DEFAULT_NAME_KEY
    log.properties[
        constants.
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] = xes_constants.DEFAULT_NAME_KEY
    # sets the default timestamp key
    log.properties[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_constants.DEFAULT_TIMESTAMP_KEY
    # sets the default resource key
    log.properties[
        constants.
        PARAMETER_CONSTANT_RESOURCE_KEY] = xes_constants.DEFAULT_RESOURCE_KEY
    # sets the default transition key
    log.properties[
        constants.
        PARAMETER_CONSTANT_TRANSITION_KEY] = xes_constants.DEFAULT_TRANSITION_KEY
    # sets the default group key
    log.properties[
        constants.
        PARAMETER_CONSTANT_GROUP_KEY] = xes_constants.DEFAULT_GROUP_KEY

    return log
Example #21
0
def to_interval(log, parameters=None):
    """
    Converts a log to interval format (e.g. an event has two timestamps)
    from lifecycle format (an event has only a timestamp, and a transition lifecycle)

    Parameters
    -------------
    log
        Log (expressed in the lifecycle format)
    parameters
        Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...)

    Returns
    -------------
    log
        Interval event log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    start_timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_START_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY in parameters else xes.DEFAULT_START_TIMESTAMP_KEY
    transition_key = parameters[
        constants.
        PARAMETER_CONSTANT_TRANSITION_KEY] if constants.PARAMETER_CONSTANT_TRANSITION_KEY in parameters else xes.DEFAULT_TRANSITION_KEY
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    if log is not None and len(log) > 0:
        if "PM4PY_TYPE" in log.attributes and log.attributes[
                "PM4PY_TYPE"] == "interval":
            return log
        if log[0] is not None and len(log[0]) > 0:
            first_event = log[0][0]
            if start_timestamp_key in first_event:
                return log

        new_log = EventLog()
        new_log.attributes["PM4PY_TYPE"] = "interval"

        for trace in log:
            new_trace = Trace()
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            activities_start = {}
            for event in trace:
                activity = event[activity_key]
                transition = event[
                    transition_key] if transition_key in event else "complete"
                timestamp = event[timestamp_key]
                if transition.lower() == "start":
                    if activity not in activities_start:
                        activities_start[activity] = list()
                    activities_start[activity].append(event)
                elif transition.lower() == "complete":
                    start_event = None
                    start_timestamp = event[timestamp_key]
                    if activity in activities_start and len(
                            activities_start[activity]) > 0:
                        start_event = activities_start[activity].pop(0)
                        start_timestamp = start_event[timestamp_key]
                    new_event = Event()
                    for attr in event:
                        if not attr == timestamp_key and not attr == transition_key:
                            new_event[attr] = event[attr]
                    if start_event is not None:
                        for attr in start_event:
                            if not attr == timestamp_key and not attr == transition_key:
                                new_event["@@startevent_" +
                                          attr] = start_event[attr]
                    new_event[start_timestamp_key] = start_timestamp
                    new_event[timestamp_key] = timestamp
                    new_event["@@duration"] = (
                        timestamp - start_timestamp).total_seconds()

                    if business_hours:
                        bh = BusinessHours(
                            start_timestamp.replace(tzinfo=None),
                            timestamp.replace(tzinfo=None),
                            worktiming=worktiming,
                            weekends=weekends)
                        new_event["@@approx_bh_duration"] = bh.getseconds()

                    new_trace.append(new_event)
            new_trace = sorting.sort_timestamp_trace(new_trace,
                                                     start_timestamp_key)
            new_log.append(new_trace)
        return new_log

    return log
def from_dict_to_event(event_dict):
    timestamp_field_name = "time:timestamp"
    if timestamp_field_name in event_dict.keys():
        event_dict[timestamp_field_name] = dt_parser.get().apply(
            event_dict[timestamp_field_name])
    return Event(event_dict)
Example #23
0
def apply(
    dfg: Dict[Tuple[str, str], int],
    start_activities: Dict[str, int],
    end_activities: Dict[str, int],
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Union[EventLog, Dict[Tuple[str, str], int]]:
    """
    Applies the playout algorithm on a DFG, extracting the most likely traces according to the DFG

    Parameters
    ---------------
    dfg
        *Complete* DFG
    start_activities
        Start activities
    end_activities
        End activities
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY => the activity key of the simulated log
        - Parameters.TIMESTAMP_KEY => the timestamp key of the simulated log
        - Parameters.MAX_NO_VARIANTS => the maximum number of variants generated by the method (default: 3000)
        - Parameters.MIN_WEIGHTED_PROBABILITY => the minimum overall weighted probability that makes the method stop
                                                (default: 1)
        - Parameters.MAX_NO_OCC_PER_ACTIVITY => the maximum number of occurrences per activity in the traces of the log
                                                (default: 2)
        - Parameters.INTERRUPT_SIMULATION_WHEN_DFG_COMPLETE => interrupts the simulation when the DFG of the simulated
                                                    log has the same keys to the DFG of the original log
                                                    (all behavior is contained) (default: False)
        - Parameters.ADD_TRACE_IF_TAKES_NEW_ELS_TO_DFG => adds a simulated trace to the simulated log only if it adds
                                                    elements to the simulated DFG, e.g., it adds behavior;
                                                    skip insertion otherwise (default: False)
        - Parameters.RETURN_VARIANTS => returns the traces as variants with a likely number of occurrences

    Returns
    ---------------
    simulated_log
        Simulated log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    max_no_variants = exec_utils.get_param_value(Parameters.MAX_NO_VARIANTS,
                                                 parameters, 3000)
    min_weighted_probability = exec_utils.get_param_value(
        Parameters.MIN_WEIGHTED_PROBABILITY, parameters, 1.0)
    interrupt_simulation_when_dfg_complete = exec_utils.get_param_value(
        Parameters.INTERRUPT_SIMULATION_WHEN_DFG_COMPLETE, parameters, False)
    add_trace_if_takes_new_els_to_dfg = exec_utils.get_param_value(
        Parameters.ADD_TRACE_IF_TAKES_NEW_ELS_TO_DFG, parameters, False)
    return_variants = exec_utils.get_param_value(Parameters.RETURN_VARIANTS,
                                                 parameters, False)
    max_execution_time = exec_utils.get_param_value(
        Parameters.MAX_EXECUTION_TIME, parameters, sys.maxsize)

    # keep track of the DFG, start activities and end activities of the (ongoing) simulation
    simulated_traces_dfg = set()
    simulated_traces_sa = set()
    simulated_traces_ea = set()
    interrupt_break_condition = False
    overall_probability = 0.0

    final_traces = []

    start_time = time.time()
    for tr, p in get_traces(dfg,
                            start_activities,
                            end_activities,
                            parameters=parameters):
        if (interrupt_simulation_when_dfg_complete
                and interrupt_break_condition
            ) or not (len(final_traces) < max_no_variants
                      and overall_probability <= min_weighted_probability):
            break
        current_time = time.time()
        if (current_time - start_time) > max_execution_time:
            break
        overall_probability += p
        diff_sa = {tr[0]}.difference(simulated_traces_sa)
        diff_ea = {tr[-1]}.difference(simulated_traces_ea)
        diff_dfg = {(tr[i], tr[i + 1])
                    for i in range(len(tr) - 1)
                    }.difference(simulated_traces_dfg)
        adds_something = len(diff_sa) > 0 or len(diff_ea) > 0 or len(
            diff_dfg) > 0
        if add_trace_if_takes_new_els_to_dfg and not adds_something:
            # interrupt the addition if the ADD_TRACE_IF_TAKES_NEW_ELS_TO_DFG is set to True,
            # and the trace does not really change the information on the DFG, start activities,
            # end activities
            continue
        # update the start activities, end activities, DFG of the original log
        simulated_traces_sa = simulated_traces_sa.union(diff_sa)
        simulated_traces_ea = simulated_traces_ea.union(diff_ea)
        simulated_traces_dfg = simulated_traces_dfg.union(diff_dfg)
        # memorize the difference between the original DFG and the DFG of the simulated log
        diff_original_sa = set(start_activities).difference(
            simulated_traces_sa)
        diff_original_ea = set(end_activities).difference(simulated_traces_ea)
        diff_original_dfg = set(dfg).difference(simulated_traces_dfg)
        interrupt_break_condition = len(diff_original_sa) == 0 and len(
            diff_original_ea) == 0 and len(diff_original_dfg) == 0
        final_traces.append((-p, tr))
        if interrupt_simulation_when_dfg_complete and interrupt_break_condition:
            break

    # make sure that the traces are strictly ordered by their probability
    # (generally, the order is already pretty good, since the states are visited in the queue based on their order,
    # but not always 100% consistent)
    final_traces = sorted(final_traces)

    if return_variants:
        # returns the variants instead of the log
        variants = []
        for p, tr in final_traces:
            variants.append({
                "variant": constants.DEFAULT_VARIANT_SEP.join(tr),
                "count": math.ceil(-p * max_no_variants)
            })
        return variants
    else:
        event_log = EventLog()
        # assigns to each event an increased timestamp from 1970
        curr_timestamp = 10000000
        for index, tr in enumerate(final_traces):
            log_trace = Trace(
                attributes={
                    xes_constants.DEFAULT_TRACEID_KEY: str(index),
                    "probability": -tr[0]
                })
            for act in tr[1]:
                log_trace.append(
                    Event({
                        activity_key:
                        act,
                        timestamp_key:
                        datetime.datetime.fromtimestamp(curr_timestamp)
                    }))
                # increases by 1 second
                curr_timestamp += 1
            event_log.append(log_trace)
        return event_log
    def run(self):
        """
        Runs the thread
        """
        if self.enable_diagnostics:
            diagnostics = SimulationDiagnostics(self)
            diagnostics.start()

        from intervaltree import IntervalTree, Interval

        logging.basicConfig()
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.DEBUG)

        net, im, fm, smap, source, sink, start_time = self.net, self.im, self.fm, self.map, self.source, self.sink, self.start_time
        places_interval_trees = self.places_interval_trees
        transitions_interval_trees = self.transitions_interval_trees
        cases_ex_time = self.cases_ex_time

        current_time = start_time

        self.internal_thread_start_time = time()
        rem_time = self.get_rem_time()

        acquired_places = set()
        acquired = source.semaphore.acquire(timeout=rem_time)
        if acquired:
            acquired_places.add(source)
        source.assigned_time.append(current_time)

        current_marking = im
        et = enabled_transitions(net, current_marking)

        first_event = None
        last_event = None

        while not fm <= current_marking or len(et) == 0:
            et = list(enabled_transitions(net, current_marking))
            ct = stochastic_utils.pick_transition(et, smap)

            simulated_execution_plus_waiting_time = -1
            while simulated_execution_plus_waiting_time < 0:
                simulated_execution_plus_waiting_time = smap[ct].get_value(
                ) if ct in smap else 0.0

            # establish how much time we need to wait before firing the transition
            # (it depends on the input places tokens)
            waiting_time = 0
            for arc in ct.out_arcs:
                place = arc.target
                sem_value = int(place.semaphore._value)
                rem_time = self.get_rem_time()
                acquired = place.semaphore.acquire(timeout=rem_time)
                if acquired:
                    acquired_places.add(place)
                rem_time = self.get_rem_time()
                if rem_time == 0:
                    break
                if sem_value == 0:
                    waiting_time = max(
                        waiting_time,
                        place.assigned_time.pop(0) -
                        current_time) if place.assigned_time else waiting_time

            if rem_time == 0:
                for place in acquired_places:
                    place.semaphore.release()
                break

            # if the waiting time is greater than 0, add an interval to the interval tree denoting
            # the waiting times for the given transition
            if waiting_time > 0:
                transitions_interval_trees[ct].add(
                    Interval(current_time, current_time + waiting_time))

            # get the actual execution time of the transition as a difference between simulated_execution_plus_waiting_time
            # and the waiting time
            execution_time = max(
                simulated_execution_plus_waiting_time - waiting_time, 0)

            # increase the timing based on the waiting time and the execution time of the transition
            current_time = current_time + waiting_time + execution_time

            for arc in ct.out_arcs:
                place = arc.target
                place.assigned_time.append(current_time)
                place.assigned_time = sorted(place.assigned_time)

            current_marking = weak_execute(ct, current_marking)

            if ct.label is not None:
                eve = Event({
                    xes_constants.DEFAULT_NAME_KEY:
                    ct.label,
                    xes_constants.DEFAULT_TIMESTAMP_KEY:
                    datetime.datetime.fromtimestamp(current_time)
                })
                last_event = eve
                if first_event is None:
                    first_event = last_event
                self.list_cases[self.id].append(eve)

            for arc in ct.in_arcs:
                place = arc.source
                p_ex_time = place.assigned_time.pop(0)
                if current_time - p_ex_time > 0:
                    places_interval_trees[place].add(
                        Interval(p_ex_time, current_time))
                place.assigned_time.append(current_time)
                place.assigned_time = sorted(place.assigned_time)
                place.semaphore.release()

            # sleep before starting next iteration
            sleep((waiting_time + execution_time) / self.small_scale_factor)

        if first_event is not None and last_event is not None:
            cases_ex_time.append(
                last_event[xes_constants.DEFAULT_TIMESTAMP_KEY].timestamp() -
                first_event[xes_constants.DEFAULT_TIMESTAMP_KEY].timestamp())
        else:
            cases_ex_time.append(0)

        places_to_free = set(current_marking).union(acquired_places)

        for place in places_to_free:
            place.semaphore.release()

        rem_time = self.get_rem_time()
        if rem_time > 0:
            self.terminated_correctly = True
            if self.enable_diagnostics:
                logger.info(
                    str(time()) + " terminated successfully thread ID " +
                    str(self.id))

        if self.enable_diagnostics:
            if rem_time == 0:
                if self.enable_diagnostics:
                    logger.info(
                        str(time()) + " terminated for timeout thread ID " +
                        str(self.id))

        if self.enable_diagnostics:
            diagnostics.diagn_open = False
Example #25
0
def to_interval(log, parameters=None):
    """
    Converts a log to interval format (e.g. an event has two timestamps)
    from lifecycle format (an event has only a timestamp, and a transition lifecycle)

    Parameters
    -------------
    log
        Log (expressed in the lifecycle format)
    parameters
        Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...)

    Returns
    -------------
    log
        Interval event log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes.DEFAULT_START_TIMESTAMP_KEY)
    transition_key = exec_utils.get_param_value(Parameters.TRANSITION_KEY,
                                                parameters,
                                                xes.DEFAULT_TRANSITION_KEY)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    lifecycle_instance_key = exec_utils.get_param_value(
        Parameters.LIFECYCLE_INSTANCE_KEY, parameters,
        xes.DEFAULT_INSTANCE_KEY)
    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])

    if log is not None and len(log) > 0:
        if "PM4PY_TYPE" in log.attributes and log.attributes[
                "PM4PY_TYPE"] == "interval":
            return log
        if log[0] is not None and len(log[0]) > 0:
            first_event = log[0][0]
            if start_timestamp_key in first_event:
                return log

        new_log = EventLog(attributes=copy(log.attributes),
                           extensions=copy(log.extensions),
                           classifiers=copy(log.classifiers),
                           omni_present=copy(log.omni_present),
                           properties=copy(log.properties))
        new_log.attributes["PM4PY_TYPE"] = "interval"
        new_log.properties[
            constants.
            PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = xes.DEFAULT_START_TIMESTAMP_KEY

        for trace in log:
            new_trace = Trace()
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            activities_start = {}
            for event in trace:
                activity = event[activity_key]
                instance = event[
                    lifecycle_instance_key] if lifecycle_instance_key in event else None
                activity = (activity, instance)
                transition = event[
                    transition_key] if transition_key in event else "complete"
                timestamp = event[timestamp_key]
                if transition.lower() == "start":
                    if activity not in activities_start:
                        activities_start[activity] = list()
                    activities_start[activity].append(event)
                elif transition.lower() == "complete":
                    start_event = None
                    start_timestamp = event[timestamp_key]
                    if activity in activities_start and len(
                            activities_start[activity]) > 0:
                        start_event = activities_start[activity].pop(0)
                        start_timestamp = start_event[timestamp_key]
                    new_event = Event()
                    for attr in event:
                        if not attr == timestamp_key and not attr == transition_key:
                            new_event[attr] = event[attr]
                    if start_event is not None:
                        for attr in start_event:
                            if not attr == timestamp_key and not attr == transition_key:
                                new_event["@@startevent_" +
                                          attr] = start_event[attr]
                    new_event[start_timestamp_key] = start_timestamp
                    new_event[timestamp_key] = timestamp
                    new_event["@@duration"] = (
                        timestamp - start_timestamp).total_seconds()

                    if business_hours:
                        bh = BusinessHours(
                            start_timestamp.replace(tzinfo=None),
                            timestamp.replace(tzinfo=None),
                            worktiming=worktiming,
                            weekends=weekends)
                        new_event["@@approx_bh_duration"] = bh.getseconds()

                    new_trace.append(new_event)
            new_trace = sorting.sort_timestamp_trace(new_trace,
                                                     start_timestamp_key)
            new_log.append(new_trace)
        return new_log

    return log
Example #26
0
def to_lifecycle(log, parameters=None):
    """
    Converts a log from interval format (e.g. an event has two timestamps)
    to lifecycle format (an event has only a timestamp, and a transition lifecycle)

    Parameters
    -------------
    log
        Log (expressed in the interval format)
    parameters
        Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...)

    Returns
    -------------
    log
        Lifecycle event log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    start_timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_START_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY in parameters else xes.DEFAULT_START_TIMESTAMP_KEY
    transition_key = parameters[
        constants.
        PARAMETER_CONSTANT_TRANSITION_KEY] if constants.PARAMETER_CONSTANT_TRANSITION_KEY in parameters else xes.DEFAULT_TRANSITION_KEY

    if log is not None and len(log) > 0:
        if "PM4PY_TYPE" in log.attributes and log.attributes[
                "PM4PY_TYPE"] == "lifecycle":
            return log
        if log[0] is not None and len(log[0]) > 0:
            first_event = log[0][0]
            if transition_key in first_event:
                return log

        new_log = EventLog()
        new_log.attributes["PM4PY_TYPE"] = "lifecycle"

        for trace in log:
            new_trace = Trace()
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            list_events = []
            for index, event in enumerate(trace):
                new_event_start = Event()
                new_event_complete = Event()
                for attr in event:
                    if not attr == timestamp_key and not attr == start_timestamp_key:
                        new_event_start[attr] = event[attr]
                        new_event_complete[attr] = event[attr]
                new_event_start[timestamp_key] = event[start_timestamp_key]
                new_event_start[transition_key] = "start"
                new_event_start["@@custom_lif_id"] = 0
                new_event_start["@@origin_ev_idx"] = index
                new_event_complete[timestamp_key] = event[timestamp_key]
                new_event_complete[transition_key] = "complete"
                new_event_complete["@@custom_lif_id"] = 1
                new_event_complete["@@origin_ev_idx"] = index
                list_events.append(new_event_start)
                list_events.append(new_event_complete)
            list_events = sorted(
                list_events,
                key=lambda x:
                (x[timestamp_key], x["@@origin_ev_idx"], x["@@custom_lif_id"]))
            for ev in list_events:
                new_trace.append(ev)
            new_log.append(new_trace)
        return new_log
    return log
Example #27
0
    def read_event(self):
        """
        Gets the next event from the iterator

        Returns
        ------------
        event
            Event
        """
        tree = self.tree
        while True:
            tree_event, elem = next(self.context)

            if tree_event == _EVENT_START:
                parent = tree[
                    elem.getparent()] if elem.getparent() in tree else None

                if elem.tag.endswith(xes_constants.TAG_TRACE):
                    self.trace = Trace()
                    tree[elem] = self.trace.attributes
                    self.reading_trace = True
                    continue

                if elem.tag.endswith(xes_constants.TAG_EVENT):
                    self.event = Event()
                    tree[elem] = self.event
                    self.reading_event = True
                    continue

                if self.reading_event or self.reading_trace:
                    if elem.tag.endswith(xes_constants.TAG_STRING):
                        if parent is not None:
                            tree = parse_attribute(
                                elem, parent, elem.get(xes_constants.KEY_KEY),
                                elem.get(xes_constants.KEY_VALUE), tree)
                        continue

                    elif elem.tag.endswith(xes_constants.TAG_DATE):
                        try:
                            dt = self.date_parser.apply(
                                elem.get(xes_constants.KEY_VALUE))
                            tree = parse_attribute(
                                elem, parent, elem.get(xes_constants.KEY_KEY),
                                dt, tree)
                        except TypeError:
                            logging.info(
                                "failed to parse date: " +
                                str(elem.get(xes_constants.KEY_VALUE)))
                        except ValueError:
                            logging.info(
                                "failed to parse date: " +
                                str(elem.get(xes_constants.KEY_VALUE)))
                        continue

                    elif elem.tag.endswith(xes_constants.TAG_FLOAT):
                        if parent is not None:
                            try:
                                val = float(elem.get(xes_constants.KEY_VALUE))
                                tree = parse_attribute(
                                    elem, parent,
                                    elem.get(xes_constants.KEY_KEY), val, tree)
                            except ValueError:
                                logging.info(
                                    "failed to parse float: " +
                                    str(elem.get(xes_constants.KEY_VALUE)))
                        continue

                    elif elem.tag.endswith(xes_constants.TAG_INT):
                        if parent is not None:
                            try:
                                val = int(elem.get(xes_constants.KEY_VALUE))
                                tree = parse_attribute(
                                    elem, parent,
                                    elem.get(xes_constants.KEY_KEY), val, tree)
                            except ValueError:
                                logging.info(
                                    "failed to parse int: " +
                                    str(elem.get(xes_constants.KEY_VALUE)))
                        continue

                    elif elem.tag.endswith(xes_constants.TAG_BOOLEAN):
                        if parent is not None:
                            try:
                                val0 = elem.get(xes_constants.KEY_VALUE)
                                val = False
                                if str(val0).lower() == "true":
                                    val = True
                                tree = parse_attribute(
                                    elem, parent,
                                    elem.get(xes_constants.KEY_KEY), val, tree)
                            except ValueError:
                                logging.info(
                                    "failed to parse boolean: " +
                                    str(elem.get(xes_constants.KEY_VALUE)))
                        continue

                    elif elem.tag.endswith(xes_constants.TAG_LIST):
                        if parent is not None:
                            # lists have no value, hence we put None as a value
                            tree = parse_attribute(
                                elem, parent, elem.get(xes_constants.KEY_KEY),
                                None, tree)
                        continue

                    elif elem.tag.endswith(xes_constants.TAG_ID):
                        if parent is not None:
                            tree = parse_attribute(
                                elem, parent, elem.get(xes_constants.KEY_KEY),
                                elem.get(xes_constants.KEY_VALUE), tree)
                        continue

            elif tree_event == _EVENT_END:
                if elem in tree:
                    del tree[elem]
                elem.clear()
                if elem.getprevious() is not None:
                    try:
                        del elem.getparent()[0]
                    except TypeError:
                        pass

                if elem.tag.endswith(xes_constants.TAG_EVENT):
                    self.reading_event = False
                    if self.acceptance_condition(self.event):
                        for attr in self.trace.attributes:
                            self.event[constants.CASE_ATTRIBUTE_PREFIX +
                                       attr] = self.trace.attributes[attr]
                        return self.event
                    continue

                elif elem.tag.endswith(xes_constants.TAG_TRACE):
                    self.reading_trace = False
                    continue

                elif elem.tag.endswith(xes_constants.TAG_LOG):
                    self.reading_log = False
                    break
Example #28
0
def preprocess_log(log, activities=None, parameters=None):
    """
    Preprocess a log to enable correlation mining

    Parameters
    --------------
    log
        Log object
    activities
        (if provided) list of activities of the log
    parameters
        Parameters of the algorithm

    Returns
    --------------
    transf_stream
        Transformed stream
    activities_grouped
        Grouped activities
    activities
        List of activities of the log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    index_key = exec_utils.get_param_value(Parameters.INDEX_KEY, parameters,
                                           DEFAULT_INDEX_KEY)

    if type(log) is pd.DataFrame:
        # keep only the two columns before conversion
        log = log[list(set([activity_key, timestamp_key,
                            start_timestamp_key]))]

    parameters["deepcopy"] = False
    parameters["include_case_attributes"] = False
    log = converter.apply(log,
                          variant=converter.TO_EVENT_STREAM,
                          parameters=parameters)
    transf_stream = EventStream()
    for idx, ev in enumerate(log):
        transf_stream.append(
            Event({
                activity_key: ev[activity_key],
                timestamp_key: ev[timestamp_key].timestamp(),
                start_timestamp_key: ev[start_timestamp_key].timestamp(),
                index_key: idx
            }))
    transf_stream = sorted(
        transf_stream,
        key=lambda x: (x[start_timestamp_key], x[timestamp_key], x[index_key]))

    if activities is None:
        activities = sorted(list(set(x[activity_key] for x in transf_stream)))

    activities_grouped = {
        x: [y for y in transf_stream if y[activity_key] == x]
        for x in activities
    }

    return transf_stream, activities_grouped, activities
Example #29
0
def apply(frequency_dfg: Dict[Tuple[str, str], int],
          start_activities: Dict[str, int],
          end_activities: Dict[str, int],
          parameters: Optional[Dict[Any, Any]] = None) -> EventLog:
    """
    Simulates a log out with the transition probabilities provided by the frequency DFG,
    and the time deltas provided by the performance DFG

    Parameters
    ---------------
    frequency_dfg
        Frequency DFG
    start_activities
        Start activities
    end_activities
        End activities
    parameters
        Parameters of the algorithm, including:
        - Parameters.NUM_TRACES: the number of traces of the simulated log
        - Parameters.ACTIVITY_KEY: the activity key to be used in the simulated log
        - Parameters.TIMESTAMP_KEY: the timestamp key to be used in the simulated log
        - Parameters.CASE_ID_KEY: the case identifier key to be used in the simulated log
        - Parameters.CASE_ARRIVAL_RATE: the average distance (in seconds) between the start of two cases (default: 1)
        - Parameters.PERFORMANCE_DFG: (mandatory) the performance DFG that is used for the time deltas.

    Returns
    ---------------
    simulated_log
        Simulated log
    """
    if parameters is None:
        parameters = {}

    num_traces = exec_utils.get_param_value(Parameters.NUM_TRACES, parameters,
                                            1000)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                             parameters,
                                             xes_constants.DEFAULT_TRACEID_KEY)
    case_arrival_rate = exec_utils.get_param_value(
        Parameters.CASE_ARRIVAL_RATE, parameters, 1)
    performance_dfg = copy(
        exec_utils.get_param_value(Parameters.PERFORMANCE_DFG, parameters,
                                   None))
    frequency_dfg = copy(frequency_dfg)

    artificial_start_activity = exec_utils.get_param_value(
        Parameters.PARAM_ARTIFICIAL_START_ACTIVITY, parameters,
        constants.DEFAULT_ARTIFICIAL_START_ACTIVITY)
    artificial_end_activity = exec_utils.get_param_value(
        Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters,
        constants.DEFAULT_ARTIFICIAL_END_ACTIVITY)

    for sa in start_activities:
        frequency_dfg[(artificial_start_activity, sa)] = start_activities[sa]
        performance_dfg[(artificial_start_activity, sa)] = 0

    for ea in end_activities:
        frequency_dfg[(ea, artificial_end_activity)] = end_activities[ea]
        performance_dfg[(ea, artificial_end_activity)] = 0

    choices = {}
    for el in frequency_dfg:
        if not el[0] in choices:
            choices[el[0]] = {}
        choices[el[0]][el[1]] = frequency_dfg[el]

    if performance_dfg is None:
        raise Exception(
            "performance DFG simulation requires the Parameters.PERFORMANCE_DFG ('performance_dfg') parameter specification."
        )

    log = EventLog()
    curr_st = 10000000

    for i in range(num_traces):
        curr_st += case_arrival_rate
        curr_t = curr_st
        trace = Trace(attributes={case_id_key: str(i)})
        log.append(trace)
        curr_act = artificial_start_activity
        while True:
            next_act = dict_based_choice(choices[curr_act])
            if next_act == artificial_end_activity or next_act is None:
                break
            perf = performance_dfg[(curr_act, next_act)]
            if type(perf) is dict:
                perf = perf["mean"]
            perf = 0 if perf == 0 else exponential(perf)
            curr_t += perf
            curr_act = next_act
            eve = Event({
                activity_key: curr_act,
                timestamp_key: datetime.fromtimestamp(curr_t)
            })
            trace.append(eve)

    return log