Esempio n. 1
0
def insert_time_from_previous(log, parameters=None):
    """
    Inserts the time from the previous event, both in normal and business hours

    Parameters
    -------------
    log
        Event log
    parameters
        Parameters of the algorithm

    Returns
    -------------
    enriched_log
        Enriched log (with the time passed from the previous event)
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    if not type(log) is EventLog:
        log = log_converter.apply(log)

    log = sorting.sort_timestamp_log(log, timestamp_key)

    for trace in log:
        if trace:
            trace[0]["@@passed_time_from_previous"] = 0
            trace[0]["@@approx_bh_passed_time_from_previous"] = 0

            i = 1
            while i < len(trace):
                trace[i]["@@passed_time_from_previous"] = (
                    trace[i][timestamp_key] -
                    trace[i - 1][timestamp_key]).total_seconds()
                bh = BusinessHours(
                    trace[i - 1][timestamp_key].replace(tzinfo=None),
                    trace[i][timestamp_key].replace(tzinfo=None),
                    worktiming=worktiming,
                    weekends=weekends)
                trace[i][
                    "@@approx_bh_passed_time_from_previous"] = bh.getseconds()
                i = i + 1

    return log
Esempio n. 2
0
def get_case_dispersion_avg(log, parameters=None):
    """
    Gets the average time interlapsed between case ends

    Parameters
    --------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> attribute of the log to be used as timestamp

    Returns
    --------------
    case_arrival_avg
        Average time interlapsed between case starts
    """
    if parameters is None:
        parameters = {}
    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)

    case_end_time = [
        trace[-1][timestamp_key] for trace in log
        if trace and timestamp_key in trace[0]
    ]
    case_end_time = sorted(case_end_time)

    case_diff_end_time = []
    for i in range(len(case_end_time) - 1):
        if business_hours:
            bh = BusinessHours(case_end_time[i].replace(tzinfo=None),
                               case_end_time[i + 1].replace(tzinfo=None),
                               worktiming=worktiming,
                               weekends=weekends)
            case_diff_end_time.append(bh.getseconds())
        else:
            case_diff_end_time.append(
                (case_end_time[i + 1] - case_end_time[i]).total_seconds())

    if case_diff_end_time:
        return statistics.mean(case_diff_end_time)

    return 0.0
Esempio n. 3
0
def get_case_arrival_avg(log, parameters=None):
    """
    Gets the average time interlapsed between case starts

    Parameters
    --------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the log to be used as timestamp

    Returns
    --------------
    case_arrival_avg
        Average time interlapsed between case starts
    """
    if parameters is None:
        parameters = {}
    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY

    case_start_time = [
        trace[0][timestamp_key] for trace in log
        if trace and timestamp_key in trace[0]
    ]
    case_start_time = sorted(case_start_time)

    case_diff_start_time = []
    for i in range(len(case_start_time) - 1):
        if business_hours:
            bh = BusinessHours(case_start_time[i].replace(tzinfo=None),
                               case_start_time[i + 1].replace(tzinfo=None),
                               worktiming=worktiming,
                               weekends=weekends)
            case_diff_start_time.append(bh.getseconds())
        else:
            case_diff_start_time.append(
                (case_start_time[i + 1] - case_start_time[i]).total_seconds())

    if case_diff_start_time:
        return statistics.median(case_diff_start_time)

    return 0.0
Esempio n. 4
0
def get_remaining_time_from_log(log, max_len_trace=100000, parameters=None):
    """
    Gets the remaining time for the instances given a log and a trace index

    Parameters
    ------------
    log
        Log
    max_len_trace
        Index
    parameters
        Parameters of the algorithm

    Returns
    ------------
    list
        List of remaining times
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]
    y_orig = []
    for trace in log:
        y_orig.append([])
        for index, event in enumerate(trace):
            if index >= max_len_trace:
                break
            timestamp_st = trace[index][timestamp_key]
            timestamp_et = trace[-1][timestamp_key]
            if business_hours:
                bh = BusinessHours(timestamp_st.replace(tzinfo=None),
                                   timestamp_et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                y_orig[-1].append(bh.getseconds())
            else:
                y_orig[-1].append(
                    (timestamp_et - timestamp_st).total_seconds())
        while len(y_orig[-1]) < max_len_trace:
            y_orig[-1].append(y_orig[-1][-1])
    return y_orig
Esempio n. 5
0
def execute_script():
    ts1 = 100000000
    ts2 = 110000000
    d1 = datetime.datetime.fromtimestamp(ts1)
    d2 = datetime.datetime.fromtimestamp(ts2)
    print(ts2 - ts1)
    # default business hours: all the days of the week except Saturday and Sunday are working days.
    bh1 = BusinessHours(d1, d2, worktiming=[[7, 12.5], [13, 17]])
    print(bh1.getseconds())
    # let's calculate the business hours using a proper work calendar.
    bh2 = BusinessHours(d1,
                        d2,
                        worktiming=[[7, 12.25], [13.25, 17]],
                        workcalendar=Italy())
    print(bh2.getseconds())
Esempio n. 6
0
def get_case_arrival_avg(
        log: EventLog,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    Gets the average time interlapsed between case starts

    Parameters
    --------------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> attribute of the log to be used as timestamp

    Returns
    --------------
    case_arrival_avg
        Average time interlapsed between case starts
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])
    workcalendar = exec_utils.get_param_value(
        Parameters.WORKCALENDAR, parameters,
        constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR)

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)

    case_start_time = [
        trace[0][timestamp_key] for trace in log
        if trace and timestamp_key in trace[0]
    ]
    case_start_time = sorted(case_start_time)

    case_diff_start_time = []
    for i in range(len(case_start_time) - 1):
        if business_hours:
            bh = BusinessHours(case_start_time[i].replace(tzinfo=None),
                               case_start_time[i + 1].replace(tzinfo=None),
                               worktiming=worktiming,
                               weekends=weekends,
                               workcalendar=workcalendar)
            case_diff_start_time.append(bh.getseconds())
        else:
            case_diff_start_time.append(
                (case_start_time[i + 1] - case_start_time[i]).total_seconds())

    if case_diff_start_time:
        return statistics.mean(case_diff_start_time)

    return 0.0
Esempio n. 7
0
def performance(
    log: Union[EventLog, EventStream],
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Dict[Tuple[str, str], float]:
    """
    Measure performance between couples of attributes in the DFG graph

    Parameters
    ----------
    log
        Log
    parameters
        Possible parameters passed to the algorithms:
            aggregationMeasure -> performance aggregation measure (min, max, mean, median)
            activity_key -> Attribute to use as activity
            timestamp_key -> Attribute to use as timestamp
        - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time.
                                        Default: False
        - Parameters.WORKTIMING => work schedule of the company (provided as a list where the first number is the start
            of the work time, and the second number is the end of the work time), if business hours are enabled
                                        Default: [7, 17] (work shift from 07:00 to 17:00)
        - Parameters.WEEKENDS => indexes of the days of the week that are weekend
                                        Default: [6, 7] (weekends are Saturday and Sunday)
    Returns
    -------
    dfg
        DFG graph
    """

    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_util.DEFAULT_NAME_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_util.DEFAULT_TIMESTAMP_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes_util.DEFAULT_TIMESTAMP_KEY)
    aggregation_measure = exec_utils.get_param_value(
        Parameters.AGGREGATION_MEASURE, parameters, "mean")

    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])
    workcalendar = exec_utils.get_param_value(
        Parameters.WORKCALENDAR, parameters,
        constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR)

    if business_hours:
        dfgs0 = map((lambda t: [(
            (t[i - 1][activity_key], t[i][activity_key]),
            max(
                0,
                BusinessHours(t[i - 1][timestamp_key].replace(tzinfo=None),
                              t[i][start_timestamp_key].replace(tzinfo=None),
                              worktiming=worktiming,
                              weekends=weekends,
                              workcalendar=workcalendar).getseconds()))
                                for i in range(1, len(t))]), log)
    else:
        dfgs0 = map((lambda t: [((t[i - 1][activity_key], t[i][activity_key]),
                                 max(0, (t[i][start_timestamp_key] - t[i - 1][
                                     timestamp_key]).total_seconds()))
                                for i in range(1, len(t))]), log)
    ret0 = {}
    for el in dfgs0:
        for couple in el:
            if not couple[0] in ret0:
                ret0[couple[0]] = []
            ret0[couple[0]].append(couple[1])
    ret = Counter()
    for key in ret0:
        if aggregation_measure == "median":
            ret[key] = median(ret0[key])
        elif aggregation_measure == "min":
            ret[key] = min(ret0[key])
        elif aggregation_measure == "max":
            ret[key] = max(ret0[key])
        elif aggregation_measure == "stdev":
            ret[key] = stdev(ret0[key]) if len(ret0[key]) > 1 else 0
        elif aggregation_measure == "sum":
            ret[key] = sum(ret0[key])
        elif aggregation_measure == "raw_values":
            ret[key] = ret0[key]
        elif aggregation_measure == "all":
            ret[key] = {
                "median": median(ret0[key]),
                "min": min(ret0[key]),
                "max": max(ret0[key]),
                "stdev": stdev(ret0[key]) if len(ret0[key]) > 1 else 0,
                "sum": sum(ret0[key]),
                "mean": mean(ret0[key])
            }
        else:
            ret[key] = mean(ret0[key])

    return ret
Esempio n. 8
0
def to_interval(log, parameters=None):
    """
    Converts a log to interval format (e.g. an event has two timestamps)
    from lifecycle format (an event has only a timestamp, and a transition lifecycle)

    Parameters
    -------------
    log
        Log (expressed in the lifecycle format)
    parameters
        Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...)

    Returns
    -------------
    log
        Interval event log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    start_timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_START_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY in parameters else xes.DEFAULT_START_TIMESTAMP_KEY
    transition_key = parameters[
        constants.
        PARAMETER_CONSTANT_TRANSITION_KEY] if constants.PARAMETER_CONSTANT_TRANSITION_KEY in parameters else xes.DEFAULT_TRANSITION_KEY
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    if log is not None and len(log) > 0:
        if "PM4PY_TYPE" in log.attributes and log.attributes[
                "PM4PY_TYPE"] == "interval":
            return log
        if log[0] is not None and len(log[0]) > 0:
            first_event = log[0][0]
            if start_timestamp_key in first_event:
                return log

        new_log = EventLog()
        new_log.attributes["PM4PY_TYPE"] = "interval"

        for trace in log:
            new_trace = Trace()
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            activities_start = {}
            for event in trace:
                activity = event[activity_key]
                transition = event[
                    transition_key] if transition_key in event else "complete"
                timestamp = event[timestamp_key]
                if transition.lower() == "start":
                    if activity not in activities_start:
                        activities_start[activity] = list()
                    activities_start[activity].append(event)
                elif transition.lower() == "complete":
                    start_event = None
                    start_timestamp = event[timestamp_key]
                    if activity in activities_start and len(
                            activities_start[activity]) > 0:
                        start_event = activities_start[activity].pop(0)
                        start_timestamp = start_event[timestamp_key]
                    new_event = Event()
                    for attr in event:
                        if not attr == timestamp_key and not attr == transition_key:
                            new_event[attr] = event[attr]
                    if start_event is not None:
                        for attr in start_event:
                            if not attr == timestamp_key and not attr == transition_key:
                                new_event["@@startevent_" +
                                          attr] = start_event[attr]
                    new_event[start_timestamp_key] = start_timestamp
                    new_event[timestamp_key] = timestamp
                    new_event["@@duration"] = (
                        timestamp - start_timestamp).total_seconds()

                    if business_hours:
                        bh = BusinessHours(
                            start_timestamp.replace(tzinfo=None),
                            timestamp.replace(tzinfo=None),
                            worktiming=worktiming,
                            weekends=weekends)
                        new_event["@@approx_bh_duration"] = bh.getseconds()

                    new_trace.append(new_event)
            new_trace = sorting.sort_timestamp_trace(new_trace,
                                                     start_timestamp_key)
            new_log.append(new_trace)
        return new_log

    return log
Esempio n. 9
0
def assign_lead_cycle_time(log, parameters=None):
    """
    Assigns the lead and cycle time to an interval log

    Parameters
    -------------
    log
        Interval log
    parameters
        Parameters of the algorithm, including: start_timestamp_key, timestamp_key, worktiming, weekends
    """
    if parameters is None:
        parameters = {}

    start_timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_START_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY in parameters else xes.DEFAULT_START_TIMESTAMP_KEY
    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    interval_log = to_interval(log, parameters=parameters)

    for trace in interval_log:
        approx_partial_lead_time = 0
        approx_partial_cycle_time = 0
        approx_wasted_time = 0
        max_et = None
        max_et_seconds = 0
        for i in range(len(trace)):
            this_wasted_time = 0
            st = trace[i][start_timestamp_key]
            st_seconds = st.timestamp()
            et = trace[i][timestamp_key]
            et_seconds = et.timestamp()

            if max_et_seconds > 0 and st_seconds > max_et_seconds:
                bh_unworked = BusinessHours(max_et.replace(tzinfo=None),
                                            st.replace(tzinfo=None),
                                            worktiming=worktiming,
                                            weekends=weekends)
                unworked_sec = bh_unworked.getseconds()
                approx_partial_lead_time = approx_partial_lead_time + unworked_sec
                approx_wasted_time = approx_wasted_time + unworked_sec
                this_wasted_time = unworked_sec

            if st_seconds > max_et_seconds:
                bh = BusinessHours(st.replace(tzinfo=None),
                                   et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                approx_bh_duration = bh.getseconds()

                approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration
                approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration
            elif st_seconds < max_et_seconds and et_seconds > max_et_seconds:
                bh = BusinessHours(max_et.replace(tzinfo=None),
                                   et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                approx_bh_duration = bh.getseconds()

                approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration
                approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration

            if et_seconds > max_et_seconds:
                max_et_seconds = et_seconds
                max_et = et

            ratio_cycle_lead_time = 1
            if approx_partial_lead_time > 0:
                ratio_cycle_lead_time = approx_partial_cycle_time / approx_partial_lead_time

            trace[i][
                "@@approx_bh_partial_cycle_time"] = approx_partial_cycle_time
            trace[i][
                "@@approx_bh_partial_lead_time"] = approx_partial_lead_time
            trace[i]["@@approx_bh_overall_wasted_time"] = approx_wasted_time
            trace[i]["@@approx_bh_this_wasted_time"] = this_wasted_time
            trace[i][
                "@approx_bh_ratio_cycle_lead_time"] = ratio_cycle_lead_time

    return interval_log
Esempio n. 10
0
case_dispersion_ratio = case_arrival.get_case_dispersion_avg(log, parameters={    case_arrival.Parameters.TIMESTAMP_KEY: "time:timestamp"})
case_dispersion_ratio


#Peformance Spectrum
from pm4py.statistics.performance_spectrum import algorithm as performance_spectrum
ps = performance_spectrum.apply(log, ["register request", "decide"], parameters= {performance_spectrum.Parameters.ACTIVITY_KEY: "concept:name", performance_spectrum.Parameters.TIMESTAMP_KEY: "time:timestamp"})
ps

#Business Hours
from pm4py.util.business_hours import BusinessHours
from datetime import datetime

st = datetime.fromtimestamp(100000000)
et = datetime.fromtimestamp(200000000)
bh_object = BusinessHours(st, et)
worked_time = bh_object.getseconds()
print(worked_time)

#Cycle Time and Waiting Time
from pm4py.objects.log.util import interval_lifecycle
enriched_log = interval_lifecycle.assign_lead_cycle_time(log)
#error


#display graphs
import os
from pm4py.objects.log.importer.xes import importer as xes_importer
log_path = os.path.join("tests","input_data","receipt.xes")
log = xes_importer.apply(log_path)
Esempio n. 11
0
def train(log, parameters=None):
    """
    Train the prediction model

    Parameters
    -----------
    log
        Event log
    parameters
        Possible parameters of the algorithm

    Returns
    ------------
    model
        Trained model
    """
    if parameters is None:
        parameters = {}

    parameters["enable_sort"] = False
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    y_orig = parameters["y_orig"] if "y_orig" in parameters else None

    log = sorting.sort_timestamp(log, timestamp_key)

    str_evsucc_attr = [activity_key]
    if "str_ev_attr" in parameters:
        str_tr_attr = parameters[
            "str_tr_attr"] if "str_tr_attr" in parameters else []
        str_ev_attr = parameters[
            "str_ev_attr"] if "str_ev_attr" in parameters else []
        num_tr_attr = parameters[
            "num_tr_attr"] if "num_tr_attr" in parameters else []
        num_ev_attr = parameters[
            "num_ev_attr"] if "num_ev_attr" in parameters else []
    else:
        str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = attributes_filter.select_attributes_from_log_for_tree(
            log)
        if activity_key not in str_ev_attr:
            str_ev_attr.append(activity_key)

    max_trace_length = max(len(x) for x in log)

    if max_trace_length == 1:
        # this you shall use
        data, feature_names = get_log_representation.get_representation(
            log,
            str_tr_attr,
            str_ev_attr,
            num_tr_attr,
            num_ev_attr,
            str_evsucc_attr=str_evsucc_attr)
        ext_log = log
    else:
        ext_log, change_indexes = get_log_with_log_prefixes(log)
        data, feature_names = get_log_representation.get_representation(
            ext_log,
            str_tr_attr,
            str_ev_attr,
            num_tr_attr,
            num_ev_attr,
            str_evsucc_attr=str_evsucc_attr)

    if y_orig is not None:
        remaining_time = [y for x in y_orig for y in x]
    else:
        if business_hours:
            remaining_time = []
            for trace in ext_log:
                if trace:
                    timestamp_et = trace[-1][timestamp_key]
                    timestamp_st = trace[0][timestamp_key]

                    bh = BusinessHours(timestamp_st.replace(tzinfo=None),
                                       timestamp_et.replace(tzinfo=None),
                                       worktiming=worktiming,
                                       weekends=weekends)
                    remaining_time.append(bh.getseconds())
                else:
                    remaining_time.append(0)
        else:
            remaining_time = []
            for trace in ext_log:
                if trace:
                    remaining_time.append(
                        (trace[-1][timestamp_key] -
                         trace[0][timestamp_key]).total_seconds())
                else:
                    remaining_time.append(0)
    regr = ElasticNet(max_iter=10000, l1_ratio=0.7)
    print(data)
    regr.fit(data, remaining_time)

    return {
        "str_tr_attr": str_tr_attr,
        "str_ev_attr": str_ev_attr,
        "num_tr_attr": num_tr_attr,
        "num_ev_attr": num_ev_attr,
        "str_evsucc_attr": str_evsucc_attr,
        "feature_names": feature_names,
        "remaining_time": remaining_time,
        "regr": regr,
        "variant": "elasticnet"
    }
Esempio n. 12
0
def to_interval(log, parameters=None):
    """
    Converts a log to interval format (e.g. an event has two timestamps)
    from lifecycle format (an event has only a timestamp, and a transition lifecycle)

    Parameters
    -------------
    log
        Log (expressed in the lifecycle format)
    parameters
        Possible parameters of the method (activity, timestamp key, start timestamp key, transition ...)

    Returns
    -------------
    log
        Interval event log
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes.DEFAULT_START_TIMESTAMP_KEY)
    transition_key = exec_utils.get_param_value(Parameters.TRANSITION_KEY,
                                                parameters,
                                                xes.DEFAULT_TRANSITION_KEY)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    lifecycle_instance_key = exec_utils.get_param_value(
        Parameters.LIFECYCLE_INSTANCE_KEY, parameters,
        xes.DEFAULT_INSTANCE_KEY)
    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])

    if log is not None and len(log) > 0:
        if "PM4PY_TYPE" in log.attributes and log.attributes[
                "PM4PY_TYPE"] == "interval":
            return log
        if log[0] is not None and len(log[0]) > 0:
            first_event = log[0][0]
            if start_timestamp_key in first_event:
                return log

        new_log = EventLog(attributes=copy(log.attributes),
                           extensions=copy(log.extensions),
                           classifiers=copy(log.classifiers),
                           omni_present=copy(log.omni_present),
                           properties=copy(log.properties))
        new_log.attributes["PM4PY_TYPE"] = "interval"
        new_log.properties[
            constants.
            PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = xes.DEFAULT_START_TIMESTAMP_KEY

        for trace in log:
            new_trace = Trace()
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            activities_start = {}
            for event in trace:
                activity = event[activity_key]
                instance = event[
                    lifecycle_instance_key] if lifecycle_instance_key in event else None
                activity = (activity, instance)
                transition = event[
                    transition_key] if transition_key in event else "complete"
                timestamp = event[timestamp_key]
                if transition.lower() == "start":
                    if activity not in activities_start:
                        activities_start[activity] = list()
                    activities_start[activity].append(event)
                elif transition.lower() == "complete":
                    start_event = None
                    start_timestamp = event[timestamp_key]
                    if activity in activities_start and len(
                            activities_start[activity]) > 0:
                        start_event = activities_start[activity].pop(0)
                        start_timestamp = start_event[timestamp_key]
                    new_event = Event()
                    for attr in event:
                        if not attr == timestamp_key and not attr == transition_key:
                            new_event[attr] = event[attr]
                    if start_event is not None:
                        for attr in start_event:
                            if not attr == timestamp_key and not attr == transition_key:
                                new_event["@@startevent_" +
                                          attr] = start_event[attr]
                    new_event[start_timestamp_key] = start_timestamp
                    new_event[timestamp_key] = timestamp
                    new_event["@@duration"] = (
                        timestamp - start_timestamp).total_seconds()

                    if business_hours:
                        bh = BusinessHours(
                            start_timestamp.replace(tzinfo=None),
                            timestamp.replace(tzinfo=None),
                            worktiming=worktiming,
                            weekends=weekends)
                        new_event["@@approx_bh_duration"] = bh.getseconds()

                    new_trace.append(new_event)
            new_trace = sorting.sort_timestamp_trace(new_trace,
                                                     start_timestamp_key)
            new_log.append(new_trace)
        return new_log

    return log
Esempio n. 13
0
def assign_lead_cycle_time(log, parameters=None):
    """
    Assigns the lead and cycle time to an interval log

    Parameters
    -------------
    log
        Interval log
    parameters
        Parameters of the algorithm, including: start_timestamp_key, timestamp_key, worktiming, weekends
    """
    if parameters is None:
        parameters = {}

    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               xes.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes.DEFAULT_START_TIMESTAMP_KEY)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])

    interval_log = to_interval(log, parameters=parameters)

    for trace in interval_log:
        approx_partial_lead_time = 0
        approx_partial_cycle_time = 0
        approx_wasted_time = 0
        max_et = None
        max_et_seconds = 0
        for i in range(len(trace)):
            this_wasted_time = 0
            st = trace[i][start_timestamp_key]
            st_seconds = st.timestamp()
            et = trace[i][timestamp_key]
            et_seconds = et.timestamp()

            if max_et_seconds > 0 and st_seconds > max_et_seconds:
                bh_unworked = BusinessHours(max_et.replace(tzinfo=None),
                                            st.replace(tzinfo=None),
                                            worktiming=worktiming,
                                            weekends=weekends)
                unworked_sec = bh_unworked.getseconds()
                approx_partial_lead_time = approx_partial_lead_time + unworked_sec
                approx_wasted_time = approx_wasted_time + unworked_sec
                this_wasted_time = unworked_sec

            if st_seconds > max_et_seconds:
                bh = BusinessHours(st.replace(tzinfo=None),
                                   et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                approx_bh_duration = bh.getseconds()

                approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration
                approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration
            elif st_seconds < max_et_seconds and et_seconds > max_et_seconds:
                bh = BusinessHours(max_et.replace(tzinfo=None),
                                   et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                approx_bh_duration = bh.getseconds()

                approx_partial_cycle_time = approx_partial_cycle_time + approx_bh_duration
                approx_partial_lead_time = approx_partial_lead_time + approx_bh_duration

            if et_seconds > max_et_seconds:
                max_et_seconds = et_seconds
                max_et = et

            ratio_cycle_lead_time = 1
            if approx_partial_lead_time > 0:
                ratio_cycle_lead_time = approx_partial_cycle_time / approx_partial_lead_time

            trace[i][
                "@@approx_bh_partial_cycle_time"] = approx_partial_cycle_time
            trace[i][
                "@@approx_bh_partial_lead_time"] = approx_partial_lead_time
            trace[i]["@@approx_bh_overall_wasted_time"] = approx_wasted_time
            trace[i]["@@approx_bh_this_wasted_time"] = this_wasted_time
            trace[i][
                "@approx_bh_ratio_cycle_lead_time"] = ratio_cycle_lead_time

    return interval_log
Esempio n. 14
0
def apply(
        log: EventLog,
        parameters: Optional[Dict[Any, Any]] = None) -> typing.TemporalProfile:
    """
    Gets the temporal profile from the log.

    Implements the approach described in:
    Stertz, Florian, Jürgen Mangler, and Stefanie Rinderle-Ma. "Temporal Conformance Checking at Runtime based on Time-infused Process Models." arXiv preprint arXiv:2008.07262 (2020).


    Parameters
    ----------
    log
        Event log
    parameters
        Parameters, including:
        - Parameters.ACTIVITY_KEY => the attribute to use as activity
        - Parameters.START_TIMESTAMP_KEY => the attribute to use as start timestamp
        - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp
        - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time.
                                        Default: False
        - Parameters.WORKTIMING => work schedule of the company (provided as a list where the first number is the start
            of the work time, and the second number is the end of the work time), if business hours are enabled
                                        Default: [7, 17] (work shift from 07:00 to 17:00)
        - Parameters.WEEKENDS => indexes of the days of the week that are weekend
                                        Default: [6, 7] (weekends are Saturday and Sunday)

    Returns
    -------
    temporal_profile
        Temporal profile of the log
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])
    workcalendar = exec_utils.get_param_value(
        Parameters.WORKCALENDAR, parameters,
        constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR)

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)

    diff_time_recordings = {}

    for trace in log:
        for i in range(len(trace) - 1):
            act_i = trace[i][activity_key]
            time_i = trace[i][timestamp_key].timestamp()
            for j in range(i + 1, len(trace)):
                time_j = trace[j][start_timestamp_key].timestamp()
                if time_j >= time_i:
                    act_j = trace[j][activity_key]
                    if not (act_i, act_j) in diff_time_recordings:
                        diff_time_recordings[(act_i, act_j)] = []
                    if business_hours:
                        bh = BusinessHours(
                            trace[i][timestamp_key].replace(tzinfo=None),
                            trace[j][start_timestamp_key].replace(tzinfo=None),
                            worktiming=worktiming,
                            weekends=weekends,
                            workcalendar=workcalendar)
                        diff_time_recordings[(act_i,
                                              act_j)].append(bh.getseconds())
                    else:
                        diff_time_recordings[(act_i,
                                              act_j)].append(time_j - time_i)

    temporal_profile = {}
    for ac in diff_time_recordings:
        if len(diff_time_recordings[ac]) > 1:
            temporal_profile[ac] = (mean(diff_time_recordings[ac]),
                                    stdev(diff_time_recordings[ac]))
        else:
            temporal_profile[ac] = (diff_time_recordings[ac][0], 0)

    return temporal_profile
Esempio n. 15
0
def apply(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Dict[str, float]:
    """
    Gets the sojourn time per activity on an event log object

    Parameters
    --------------
    dataframe
        Pandas dataframe
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY => activity key
        - Parameters.START_TIMESTAMP_KEY => start timestamp key
        - Parameters.TIMESTAMP_KEY => timestamp key
        - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time.
                                        Default: False
        - Parameters.WORKTIMING => work schedule of the company (provided as a list where the first number is the start
            of the work time, and the second number is the end of the work time), if business hours are enabled
                                        Default: [7, 17] (work shift from 07:00 to 17:00)
        - Parameters.WEEKENDS => indexes of the days of the week that are weekend
                                        Default: [6, 7] (weekends are Saturday and Sunday)
        - Parameters.AGGREGATION_MEASURE => performance aggregation measure (sum, min, max, mean, median)

    Returns
    --------------
    soj_time_dict
        Sojourn time dictionary
    """
    if parameters is None:
        parameters = {}

    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])
    workcalendar = exec_utils.get_param_value(
        Parameters.WORKCALENDAR, parameters,
        constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR)

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    aggregation_measure = exec_utils.get_param_value(
        Parameters.AGGREGATION_MEASURE, parameters, "mean")

    durations_dict = {}
    activities = [ev[activity_key] for trace in log for ev in trace]
    for act in activities:
        durations_dict[act] = []

    for trace in log:
        for event in trace:
            activity = event[activity_key]
            if business_hours:
                bh = BusinessHours(
                    event[start_timestamp_key].replace(tzinfo=None),
                    event[timestamp_key].replace(tzinfo=None),
                    worktiming=worktiming,
                    weekends=weekends,
                    workcalendar=workcalendar)
                durations_dict[activity].append(bh.getseconds())
            else:
                start_time = event[start_timestamp_key].timestamp()
                complete_time = event[timestamp_key].timestamp()
                durations_dict[activity].append(complete_time - start_time)

    for act in durations_dict:
        if aggregation_measure == "median":
            durations_dict[act] = median(durations_dict[act])
        elif aggregation_measure == "min":
            durations_dict[act] = min(durations_dict[act])
        elif aggregation_measure == "max":
            durations_dict[act] = max(durations_dict[act])
        elif aggregation_measure == "sum":
            durations_dict[act] = sum(durations_dict[act])
        else:
            durations_dict[act] = mean(durations_dict[act])

    return durations_dict
def get_class_representation_by_trace_duration(log,
                                               target_trace_duration,
                                               timestamp_key="time:timestamp",
                                               parameters=None):
    """
    Get class representation by splitting traces according to trace duration

    Parameters
    ------------
    log
        Trace log
    target_trace_duration
        Target trace duration
    timestamp_key
        Timestamp key

    Returns
    ------------
    target
        Target part for decision tree learning
    classes
        Name of the classes, in order
    """
    if parameters is None:
        parameters = {}

    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    count = 0
    dictionary = {}
    target = []
    classes = []

    for trace in log:
        value = "LESSEQUAL"
        if len(trace) > 0 and timestamp_key in trace[
                0] and timestamp_key in trace[-1]:
            timestamp_st = trace[0][timestamp_key]
            timestamp_et = trace[-1][timestamp_key]
            if business_hours:
                bh = BusinessHours(timestamp_st.replace(tzinfo=None),
                                   timestamp_et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                diff = bh.getseconds()
            else:
                diff = (timestamp_et - timestamp_st).total_seconds()
            if diff > target_trace_duration:
                value = "GREATER"
        if not str(value) in dictionary:
            dictionary[str(value)] = count
            classes.append(str(value))
            count = count + 1
        target.append(dictionary[str(value)])

    target = np.array(target)
    return target, classes
Esempio n. 17
0
def statics(request):

    logadr = ADRESS
    log = infra.recieve_and_convert_log.convert_log(logadr, logname, logtime,
                                                    logtran, logstart,
                                                    logcompl, logreso, logid,
                                                    inputname[-3:])
    ptree = infra.recieve_and_convert_log.get_processtree(log)
    #duration = infra.recieve_and_convert_log.get_duration(log)
    duration = Duration
    #deviation = infra.recieve_and_convert_log.get_deviation(duration,log)
    deviation = Deviation
    #waitingtime = infra.recieve_and_convert_log.waitingtime(log)
    waitingtime = Waitingtime
    #frequency = infra.recieve_and_convert_log.get_waitinhour(log,Waitingtime,'n',Watichange)
    #frequency = Frequency

    st = datetime.fromtimestamp(100000000)
    et = datetime.fromtimestamp(200000000)
    bh_object = BusinessHours(st, et)
    worked_time = bh_object.getseconds()

    #log_path = os.path.join("tests","input_data","receipt.xes")
    initialtrace = infra.recieve_and_convert_log.initialtrace(log)
    x, y = case_statistics.get_kde_caseduration(
        log,
        parameters={
            constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: "time:timestamp"
        })
    gviz1 = graphs_visualizer.apply_plot(
        x, y, variant=graphs_visualizer.Variants.CASES)
    gviz2 = graphs_visualizer.apply_semilogx(
        x, y, variant=graphs_visualizer.Variants.CASES)
    graphs_visualizer.save(gviz1, "DES1/static/image1.gv.png")
    graphs_visualizer.save(gviz2, "DES1/static/image2.gv.png")

    x, y = attributes_filter.get_kde_date_attribute(log,
                                                    attribute="time:timestamp")
    gviz3 = graphs_visualizer.apply_plot(
        x, y, variant=graphs_visualizer.Variants.DATES)
    graphs_visualizer.save(gviz3, "DES1/static/image3.gv.png")
    '''
    x, y = attributes_filter.get_kde_numeric_attribute(log, "amount")
    gviz4 = graphs_visualizer.apply_plot(x, y, variant=graphs_visualizer.Variants.ATTRIBUTES)
    gviz5 = graphs_visualizer.apply_semilogx(x, y, variant=graphs_visualizer.Variants.ATTRIBUTES)
    graphs_visualizer.save(gviz4,"./static/image4.gv.png")
    graphs_visualizer.save(gviz5,"./static/image5.gv.png")
    '''
    numtrace = infra.recieve_and_convert_log.statics(log)[0]
    numactivity = infra.recieve_and_convert_log.statics(log)[1]
    activitylist = infra.recieve_and_convert_log.statics(log)[2]
    timeinterval = infra.recieve_and_convert_log.statics(log)[3]
    meanthoughputtime = infra.recieve_and_convert_log.statics(log)[4][0]
    deviationthoughputtime = infra.recieve_and_convert_log.statics(log)[4][1]
    arrivalratio = infra.recieve_and_convert_log.statics(log)[5]
    dispersionratio = infra.recieve_and_convert_log.statics(log)[6]
    resourcedict = infra.recieve_and_convert_log.initialresource1(log)
    initialcapacity = infra.recieve_and_convert_log.computecapacity(log)
    initiallimit = infra.recieve_and_convert_log.initiallimit(log)[0]
    initialcaplim = []
    for i in range(len(initialcapacity)):
        initialcaplim.append(
            (initialcapacity[i][0], initialcapacity[i][1], initiallimit[i][1]))
    #print(intialcapacity,"line 205")
    Actresource = roles_discovery.apply(
        log, variant=None, parameters={rpd.Parameters.RESOURCE_KEY: logreso})
    list0 = []
    infra.recieve_and_convert_log.notdoact(ptree, list0)
    handover = infra.recieve_and_convert_log.getactivityresourcecount(
        log, list0, logname, logreso)[1]

    for i, x in enumerate(duration):
        duration[i] = (x[0], round(x[1], 2), round(deviation[i][1], 2))
    for i, x in enumerate(deviation):
        deviation[i] = (x[0], round(x[1], 2))
    context = {'log':log,'ptree':ptree,'duration':duration,'deviation':deviation,\
    'worked_time':worked_time,'numtrace':numtrace,'numactivity':numactivity,'activitylist':activitylist,\
    'timeinterval':timeinterval,'meanthoughputtime':meanthoughputtime,\
    'deviationthoughputtime':deviationthoughputtime,'arrivalratio':arrivalratio,\
    'dispersionratio':dispersionratio,'resourcedict':Actresource,'handover':handover,"initialcaplim":initialcaplim,'initialtrace':initialtrace}
    return render(request, 'statics.html', context)
Esempio n. 18
0
def performance_calculation_ocel_aggregation(ocel: OCEL, aggregation: Dict[str, Dict[Tuple[str, str], Set[Any]]],
                                             parameters: Optional[Dict[Any, Any]] = None) -> Dict[
    str, Dict[Tuple[str, str], List[float]]]:
    """
    Calculates the performance based on one of the following aggregations:
    - aggregate_ev_couples
    - aggregate_total_objects

    Parameters
    ----------------
    ocel
        Object-centric event log
    aggregation
        Aggregation calculated using one of the aforementioned methods
    parameters
        Parameters of the algorithm, including:
        - Parameters.EVENT_ID => the event identifier
        - Parameters.EVENT_TIMESTAMP => the timestamp
        - Parameters.BUSINESS_HOURS => enables/disables the business hours
        - Parameters.WORKTIMING => the work timing (default: [7, 17])
        - Parameters.WEEKENDS => the weekends (default: [6, 7])

    Returns
    ----------------
    edges_performance
        For each object type, associate a dictionary where to each activity couple
        all the times between the activities are recorded.
    """
    if parameters is None:
        parameters = {}

    event_id = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column)
    timestamp_key = exec_utils.get_param_value(Parameters.EVENT_TIMESTAMP, parameters, ocel.event_timestamp)
    timestamps = ocel.events.groupby(event_id)[timestamp_key].apply(list).to_dict()
    timestamps = {x: y[0] for x, y in timestamps.items()}

    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7])
    workcalendar = exec_utils.get_param_value(Parameters.WORKCALENDAR, parameters, constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR)

    ret = {}

    for ot in aggregation:
        ret[ot] = {}
        for act in aggregation[ot]:
            ret[ot][act] = []
            for el in aggregation[ot][act]:
                if business_hours:
                    bh = BusinessHours(timestamps[el[0]],
                                       timestamps[el[1]],
                                       worktiming=worktiming,
                                       weekends=weekends,
                                       workcalendar=workcalendar)
                    diff = bh.getseconds()
                else:
                    diff = timestamps[el[1]].timestamp() - timestamps[el[0]].timestamp()
                ret[ot][act].append(diff)
            ret[ot][act] = sorted(ret[ot][act])

    return ret
Esempio n. 19
0
def single_element_statistics(log,
                              net,
                              initial_marking,
                              aligned_traces,
                              variants_idx,
                              activity_key="concept:name",
                              timestamp_key="time:timestamp",
                              ht_perf_method="last",
                              parameters=None):
    """
    Get single Petrinet element statistics

    Parameters
    ------------
    log
        Log
    net
        Petri net
    initial_marking
        Initial marking
    aligned_traces
        Result of the token-based replay
    variants_idx
        Variants along with indexes of belonging traces
    activity_key
        Activity key (must be specified if different from concept:name)
    timestamp_key
        Timestamp key (must be specified if different from time:timestamp)
    ht_perf_method
        Method to use in order to annotate hidden transitions (performance value could be put on the last possible
        point (last) or in the first possible point (first)
    parameters
        Possible parameters of the algorithm

    Returns
    ------------
    statistics
        Petri net element statistics (frequency, unaggregated performance)
    """
    if parameters is None:
        parameters = {}

    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    statistics = {}

    for variant in variants_idx:
        first_trace = log[variants_idx[variant][0]]
        act_trans = aligned_traces[variants_idx[variant]
                                   [0]]["activated_transitions"]
        annotations_places_trans, annotations_arcs = calculate_annotation_for_trace(
            first_trace,
            net,
            initial_marking,
            act_trans,
            activity_key,
            ht_perf_method=ht_perf_method)

        for el in annotations_places_trans:
            if el not in statistics:
                statistics[el] = {
                    "count": 0,
                    "performance": [],
                    "log_idx": [],
                    "no_of_times_enabled": 0,
                    "no_of_times_activated": 0
                }
            statistics[el]["count"] += annotations_places_trans[el][
                "count"] * len(variants_idx[variant])
            if "no_of_times_enabled" in annotations_places_trans[el]:
                statistics[el][
                    "no_of_times_enabled"] += annotations_places_trans[el][
                        "no_of_times_enabled"] * len(variants_idx[variant])
                statistics[el][
                    "no_of_times_activated"] += annotations_places_trans[el][
                        "no_of_times_activated"] * len(variants_idx[variant])

            if "performance" in annotations_places_trans[el]:
                for trace_idx in variants_idx[variant]:
                    trace = log[trace_idx]
                    for perf_couple in annotations_places_trans[el][
                            "performance"]:
                        if timestamp_key in trace[perf_couple[
                                0]] and timestamp_key in trace[perf_couple[1]]:
                            if business_hours:
                                bh = BusinessHours(
                                    trace[perf_couple[1]]
                                    [timestamp_key].replace(tzinfo=None),
                                    trace[perf_couple[0]]
                                    [timestamp_key].replace(tzinfo=None),
                                    worktiming=worktiming,
                                    weekends=weekends)
                                perf = bh.getseconds()
                            else:
                                perf = (trace[perf_couple[0]][timestamp_key] -
                                        trace[perf_couple[1]][timestamp_key]
                                        ).total_seconds()
                        else:
                            perf = 0.0
                        statistics[el]["performance"].append(perf)
                        statistics[el]["log_idx"].append(trace_idx)
        for el in annotations_arcs:
            if el not in statistics:
                statistics[el] = {"count": 0, "performance": []}
            statistics[el]["count"] += annotations_arcs[el]["count"] * len(
                variants_idx[variant])
            for trace_idx in variants_idx[variant]:
                trace = log[trace_idx]
                for perf_couple in annotations_arcs[el]["performance"]:
                    if timestamp_key in trace[perf_couple[
                            0]] and timestamp_key in trace[perf_couple[1]]:
                        if business_hours:
                            bh = BusinessHours(
                                trace[perf_couple[1]][timestamp_key].replace(
                                    tzinfo=None),
                                trace[perf_couple[0]][timestamp_key].replace(
                                    tzinfo=None),
                                worktiming=worktiming,
                                weekends=weekends)
                            perf = bh.getseconds()
                        else:
                            perf = (trace[perf_couple[0]][timestamp_key] -
                                    trace[perf_couple[1]][timestamp_key]
                                    ).total_seconds()
                    else:
                        perf = 0.0
                    statistics[el]["performance"].append(perf)

    return statistics
Esempio n. 20
0
def apply(
    log: EventLog,
    temporal_profile: typing.TemporalProfile,
    parameters: Optional[Dict[Any, Any]] = None
) -> typing.TemporalProfileConformanceResults:
    """
    Checks the conformance of the log using the provided temporal profile.

    Implements the approach described in:
    Stertz, Florian, Jürgen Mangler, and Stefanie Rinderle-Ma. "Temporal Conformance Checking at Runtime based on Time-infused Process Models." arXiv preprint arXiv:2008.07262 (2020).


    Parameters
    ---------------
    log
        Event log
    temporal_profile
        Temporal profile
    parameters
        Parameters of the algorithm, including:
         - Parameters.ACTIVITY_KEY => the attribute to use as activity
         - Parameters.START_TIMESTAMP_KEY => the attribute to use as start timestamp
         - Parameters.TIMESTAMP_KEY => the attribute to use as timestamp
         - Parameters.ZETA => multiplier for the standard deviation
         - Parameters.BUSINESS_HOURS => calculates the difference of time based on the business hours, not the total time.
                                        Default: False
         - Parameters.WORKTIMING => work schedule of the company (provided as a list where the first number is the start
            of the work time, and the second number is the end of the work time), if business hours are enabled
                                        Default: [7, 17] (work shift from 07:00 to 17:00)
         - Parameters.WEEKENDS => indexes of the days of the week that are weekend
                                        Default: [6, 7] (weekends are Saturday and Sunday)

    Returns
    ---------------
    list_dev
        A list containing, for each trace, all the deviations.
        Each deviation is a tuple with four elements:
        - 1) The source activity of the recorded deviation
        - 2) The target activity of the recorded deviation
        - 3) The time passed between the occurrence of the source activity and the target activity
        - 4) The value of (time passed - mean)/std for this occurrence (zeta).
    """
    if parameters is None:
        parameters = {}

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)

    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS,
                                                parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters,
                                            [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters,
                                          [6, 7])
    workcalendar = exec_utils.get_param_value(
        Parameters.WORKCALENDAR, parameters,
        constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR)

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(
        Parameters.TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters,
        xes_constants.DEFAULT_TIMESTAMP_KEY)
    zeta = exec_utils.get_param_value(Parameters.ZETA, parameters, 6.0)

    ret = []

    for trace in log:
        deviations = []
        for i in range(len(trace) - 1):
            act_i = trace[i][activity_key]
            time_i = trace[i][timestamp_key].timestamp()
            for j in range(i + 1, len(trace)):
                time_j = trace[j][start_timestamp_key].timestamp()
                if time_j >= time_i:
                    act_j = trace[j][activity_key]
                    if (act_i, act_j) in temporal_profile:
                        if business_hours:
                            bh = BusinessHours(
                                trace[i][timestamp_key].replace(tzinfo=None),
                                trace[j][start_timestamp_key].replace(
                                    tzinfo=None),
                                worktiming=worktiming,
                                weekends=weekends,
                                workcalendar=workcalendar)
                            this_diff = bh.getseconds()
                        else:
                            this_diff = time_j - time_i
                        mean = temporal_profile[(act_i, act_j)][0]
                        std = temporal_profile[(act_i, act_j)][1]
                        if this_diff < mean - zeta * std or this_diff > mean + zeta * std:
                            this_zeta = abs(this_diff - mean
                                            ) / std if std > 0 else sys.maxsize
                            deviations.append(
                                (act_i, act_j, this_diff, this_zeta))

        ret.append(deviations)

    return ret
Esempio n. 21
0
def get_cases_description(log, parameters=None):
    """
    Get a description of traces present in the log_skeleton

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
        Parameters.CASE_ID_KEY -> Trace attribute in which the case ID is contained
        Parameters.TIMESTAMP_KEY -> Column that identifies the timestamp
        Parameters.ENABLE_SORT -> Enable sorting of traces
        Parameters.SORT_BY_INDEX ->         Sort the traces using this index:
            0 -> case ID
            1 -> start time
            2 -> end time
            3 -> difference
        Parameters.SORT_ASCENDING -> Set sort direction (boolean; it true then the sort direction is ascending, otherwise
        descending)
        Parameters.MAX_RET_CASES -> Set the maximum number of returned traces

    Returns
    -----------
    ret
        Dictionary of traces associated to their start timestamp, their end timestamp and their duration
    """

    if parameters is None:
        parameters = {}

    case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, DEFAULT_TRACEID_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
    enable_sort = exec_utils.get_param_value(Parameters.ENABLE_SORT, parameters, True)
    sort_by_index = exec_utils.get_param_value(Parameters.SORT_BY_INDEX, parameters, 0)
    sort_ascending = exec_utils.get_param_value(Parameters.SORT_ASCENDING, parameters, True)
    max_ret_cases = exec_utils.get_param_value(Parameters.MAX_RET_CASES, parameters, None)
    business_hours = exec_utils.get_param_value(Parameters.BUSINESS_HOURS, parameters, False)
    worktiming = exec_utils.get_param_value(Parameters.WORKTIMING, parameters, [7, 17])
    weekends = exec_utils.get_param_value(Parameters.WEEKENDS, parameters, [6, 7])

    statistics_list = []

    for index, trace in enumerate(log):
        if trace:
            ci = trace.attributes[case_id_key] if case_id_key in trace.attributes else "EMPTY" + str(index)
            st = trace[0][timestamp_key]
            et = trace[-1][timestamp_key]
            if business_hours:
                bh = BusinessHours(st.replace(tzinfo=None), et.replace(tzinfo=None), worktiming=worktiming,
                                   weekends=weekends)
                diff = bh.getseconds()
            else:
                diff = et.timestamp() - st.timestamp()
            st = st.timestamp()
            et = et.timestamp()
            statistics_list.append([ci, st, et, diff])

    if enable_sort:
        statistics_list = sorted(statistics_list, key=lambda x: x[sort_by_index], reverse=not sort_ascending)

    if max_ret_cases is not None:
        statistics_list = statistics_list[:min(len(statistics_list), max_ret_cases)]

    statistics_dict = {}

    for el in statistics_list:
        statistics_dict[str(el[0])] = {"startTime": el[1], "endTime": el[2], "caseDuration": el[3]}

    return statistics_dict
Esempio n. 22
0
def get_cases_description(log, parameters=None):
    """
    Get a description of traces present in the log

    Parameters
    -----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
        case_id_key -> Trace attribute in which the case ID is contained
        timestamp_key -> Column that identifies the timestamp
        enable_sort -> Enable sorting of traces
        sort_by_index ->         Sort the traces using this index:
            0 -> case ID
            1 -> start time
            2 -> end time
            3 -> difference
        sort_ascending -> Set sort direction (boolean; it true then the sort direction is ascending, otherwise
        descending)
        max_ret_cases -> Set the maximum number of returned traces

    Returns
    -----------
    ret
        Dictionary of traces associated to their start timestamp, their end timestamp and their duration
    """

    if parameters is None:
        parameters = {}

    case_id_key = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else DEFAULT_TRACEID_KEY
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    enable_sort = parameters[
        "enable_sort"] if "enable_sort" in parameters else True
    sort_by_index = parameters[
        "sort_by_index"] if "sort_by_index" in parameters else 0
    sort_ascending = parameters[
        "sort_ascending"] if "sort_ascending" in parameters else True
    max_ret_cases = parameters[
        "max_ret_cases"] if "max_ret_cases" in parameters else None
    business_hours = parameters[
        "business_hours"] if "business_hours" in parameters else False
    worktiming = parameters["worktiming"] if "worktiming" in parameters else [
        7, 17
    ]
    weekends = parameters["weekends"] if "weekends" in parameters else [6, 7]

    statistics_list = []

    for index, trace in enumerate(log):
        if trace:
            ci = trace.attributes[
                case_id_key] if case_id_key in trace.attributes else "EMPTY" + str(
                    index)
            st = trace[0][timestamp_key]
            et = trace[-1][timestamp_key]
            if business_hours:
                bh = BusinessHours(st.replace(tzinfo=None),
                                   et.replace(tzinfo=None),
                                   worktiming=worktiming,
                                   weekends=weekends)
                diff = bh.getseconds()
            else:
                diff = et.timestamp() - st.timestamp()
            st = st.timestamp()
            et = et.timestamp()
            statistics_list.append([ci, st, et, diff])

    if enable_sort:
        statistics_list = sorted(statistics_list,
                                 key=lambda x: x[sort_by_index],
                                 reverse=not sort_ascending)

    if max_ret_cases is not None:
        statistics_list = statistics_list[:min(len(statistics_list
                                                   ), max_ret_cases)]

    statistics_dict = {}

    for el in statistics_list:
        statistics_dict[str(el[0])] = {
            "startTime": el[1],
            "endTime": el[2],
            "caseDuration": el[3]
        }

    return statistics_dict