def event_trace_sampling(log, k):
    new_log = EventLog()
    unique_ev = unique_activities(log)
    sampled = random.sample(unique_ev, k)
    for trace in log:
        valid = True
        eve_list = []
        for event in trace:
            if event['concept:name'] not in sampled:
                valid = False
                break
            eve_list.append(event['concept:name'])
        if valid:
            new_log.append(trace)
    print(f'lenght of sampled log is: {len(new_log)}')
    return new_log
Ejemplo n.º 2
0
def filter_on_case_performance(log, inf_perf, sup_perf, parameters=None):
    """
    Gets a filtered log_skeleton keeping only traces that satisfy the given performance requirements

    Parameters
    ------------
    log
        Log
    inf_perf
        Lower bound on the performance
    sup_perf
        Upper bound on the performance
    parameters
        Parameters

    Returns
    -----------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    filtered_log = EventLog([
        trace for trace in log
        if satisfy_perf(trace, inf_perf, sup_perf, timestamp_key)
    ])
    return filtered_log
Ejemplo n.º 3
0
def filter_cases_exceeding_specified_acti_performance(log,
                                                      transition_performance,
                                                      activity, lower_bound):
    """
    Filter cases exceeding the specified activity performance threshold

    Parameters
    ------------
    log
        Event log
    transition_performance
        Dictionary where each transition label is associated to performance measures
    activity
        Target activity (of the filter)
    lower_bound
        Lower bound (filter cases which have a duration of the activity exceeding)

    Returns
    ------------
    filtered_log
        Filtered log
    """
    satisfying_indexes = get_idx_exceeding_specified_acti_performance(
        log, transition_performance, activity, lower_bound)
    new_log = EventLog(list(log[i] for i in satisfying_indexes))
    return new_log
Ejemplo n.º 4
0
def apply(log, admitted_start_activities, parameters=None):
    """
    Filter the log_skeleton on the specified start activities

    Parameters
    -----------
    log
        log_skeleton
    admitted_start_activities
        Admitted start activities
    parameters
        Algorithm parameters

    Returns
    -----------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}
    attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                               parameters, DEFAULT_NAME_KEY)

    filtered_log = EventLog([
        trace for trace in log
        if trace and trace[0][attribute_key] in admitted_start_activities
    ])

    return filtered_log
Ejemplo n.º 5
0
    def createEventLog(self, original_log, simplifiedlog, event_attributes,
                       life_cycle, all_life_cycle, sensitive_attributes,
                       time_accuracy):
        time_prefix = 'time:timestamp'
        life_cycle_prefix = ['lifecycle:transition']
        deleteLog = []
        log = copy.deepcopy(original_log)
        for i in range(0, len(log)):
            caseId = log[i].attributes["concept:name"]
            if caseId not in simplifiedlog.keys():
                deleteLog.append(i)
                continue
            trace = simplifiedlog[caseId]["trace"]
            del_list = []
            simple_trace, sens = self.create_trace(log[i], event_attributes,
                                                   life_cycle, all_life_cycle,
                                                   life_cycle_prefix,
                                                   time_prefix,
                                                   sensitive_attributes,
                                                   time_accuracy, 0, 0)
            j = 0
            while j < len(log[i]):
                if (simple_trace[j] not in trace):
                    del_list.append(log[i][j])
                j += 1
            for x in del_list:
                log[i]._list.remove(x)

        for i in sorted(deleteLog, reverse=True):
            log._list.remove(log[i])

        log2 = EventLog([trace for trace in log],
                        classifiers=original_log.classifiers)

        return log2
def apply(log, admitted_end_activities, parameters=None):
    """
    Filter the log on the specified end activities

    Parameters
    -----------
    log
        Log
    admitted_end_activities
        Admitted end activities
    parameters
        Algorithm parameters

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    attribute_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY

    filtered_log = [
        trace for trace in log
        if trace and trace[-1][attribute_key] in admitted_end_activities
    ]
    return EventLog(filtered_log)
def variant_filter(log):
    new_log = EventLog()
    result = []
    variant_list = get.get_variants(log)
    variant_list_count = case_statistics.get_variant_statistics(log)
    sampled = random.sample(variant_list_count, 1000)
    vlist = [v['variant'] for v in variant_list_count]
    vlist_s = [v['variant'] for v in sampled]
    for v in vlist:
        if v in vlist_s:
            for trace in variant_list[v]:
                new_log.append(trace)

    new_len = len(case_statistics.get_variant_statistics(new_log))
    result.extend([new_len, len(new_log), len(unique_activities(new_log))])
    return new_log
Ejemplo n.º 8
0
def apply(log, values, parameters=None):
    """
    Filter log by keeping only traces that has/has not events with an attribute value that belongs to the provided
    values list

    Parameters
    -----------
    log
        Trace log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Attribute identifying the activity in the log
            Parameters.POSITIVE -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)

    filtered_log = EventLog()
    for trace in log:
        new_trace = Trace()

        found = False
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if attribute_value in values:
                    found = True

        if (found and positive) or (not found and not positive):
            new_trace = trace
        else:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]

        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
Ejemplo n.º 9
0
def filter_log_by_attributes_threshold(log,
                                       attributes,
                                       variants,
                                       vc,
                                       threshold,
                                       attribute_key="concept:name"):
    """
    Keep only attributes which number of occurrences is above the threshold (or they belong to the first variant)

    Parameters
    ----------
    log
        Log
    attributes
        Dictionary of attributes associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove attributes which number of occurrences is below the threshold)
    attribute_key
        (If specified) Specify the activity key in the log (default concept:name)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog()
    fva = [
        x[attribute_key] for x in variants[vc[0][0]][0] if attribute_key in x
    ]
    for trace in log:
        new_trace = Trace()
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if attribute_value in attributes:
                    if attribute_value in fva or attributes[
                            attribute_value] >= threshold:
                        new_trace.append(trace[j])
        if len(new_trace) > 0:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            filtered_log.append(new_trace)
    return filtered_log
def apply(log, parameters=None):
    """
    Apply PCA + DBSCAN clustering after creating a representation of the log containing
    the wanted attributes and the wanted succession of attributes

    Parameters
    -----------
    log
        Trace log
    parameters
        Parameters of the algorithm, including:
            pca_components -> Number of the components for the PCA
            dbscan_eps -> EPS value for the DBScan clustering
            str_tr_attr -> String trace attributes to consider in feature representation
            str_ev_attr -> String event attributes to consider in feature representation
            num_tr_attr -> Numeric trace attributes to consider in feature representation
            num_ev_attr -> Numeric event attributes to consider in feature representation
            str_evsucc_attr -> Succession between event attributes to consider in feature representation

    Returns
    -----------
    log_list
        A list containing, for each cluster, a different log
    """
    if parameters is None:
        parameters = {}

    pca_components = parameters[
        "pca_components"] if "pca_components" in parameters else 3
    dbscan_eps = parameters["dbscan_eps"] if "dbscan_eps" in parameters else 0.3

    log_list = []

    data, feature_names = get_.get_representation(log,
                                                  str_ev_attr=['concept:name'],
                                                  str_tr_attr=[],
                                                  num_ev_attr=[],
                                                  num_tr_attr=[],
                                                  str_evsucc_attr=[])

    pca = PCA(n_components=pca_components)
    pca.fit(data)
    data2d = pca.transform(data)

    db = DBSCAN(eps=dbscan_eps).fit(data2d)
    labels = db.labels_

    already_seen = {}

    for i in range(len(log)):
        if not labels[i] in already_seen:
            already_seen[labels[i]] = len(list(already_seen.keys()))
            log_list.append(EventLog())
        trace = Trace(log[i])
        for attribute in log[i].attributes:
            trace.attributes[attribute] = log[i].attributes[attribute]
        log_list[already_seen[labels[i]]].append(trace)

    return log_list
def execute_script():
    log = xes_importer.apply(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    log = sorting.sort_timestamp(log)
    net, im, fm = inductive_miner.apply(log)
    log1 = EventLog(log[:500])
    log2 = EventLog(log[len(log) - 500:])
    statistics = element_usage_comparison.compare_element_usage_two_logs(
        net, im, fm, log1, log2)
    gviz = pn_vis.apply(
        net,
        im,
        fm,
        variant=pn_vis.Variants.FREQUENCY,
        aggregated_statistics=statistics,
        parameters={pn_vis.Variants.FREQUENCY.value.Parameters.FORMAT: "svg"})
    pn_vis.view(gviz)
Ejemplo n.º 12
0
def merge_log(path, cate, iter):
    loglist = []
    mergedlog = EventLog()

    for i in range(1, cate + 1):
        for j in range(1, iter + 1):
            log = xes_importer.apply(path + '\\log_1_' + str(i) + '_' + str(j) + ".xes")
            for trace in log:
                trace.attributes["concept:name"] = str(iter * (i - 1) + j)
                trace.attributes["index"] = str(iter * (i - 1) + j)
            loglist.append(log)

    for i in range(len(loglist)):
        for trace in loglist[i]:
            mergedlog.append(trace)

    return loglist, mergedlog
Ejemplo n.º 13
0
def apply_trace_attributes(log, list_of_values, parameters=None):
    """
    Filter log by keeping only traces that has/has not certain case attribute value that belongs to the provided
    values list

    Parameters
    -----------
    log
        Trace log
    values
        Allowed attribute values(if it's numerical value, [] is needed to make it a list)
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the case in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)

    filtered_log = EventLog()
    for trace in log:
        new_trace = Trace()

        found = False
        if attribute_key in trace.attributes:
            attribute_value = trace.attributes[attribute_key]
            if attribute_value in list_of_values:
                found = True

        if (found and positive) or (not found and not positive):
            new_trace = trace
        else:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]

        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
Ejemplo n.º 14
0
def project(log: EventLog, cut: Cut, activity_key: str) -> List[EventLog]:
    do = cut[0]
    redo = cut[1:]
    do_log = EventLog()
    redo_logs = [EventLog()] * len(redo)
    for t in log:
        do_trace = Trace()
        redo_trace = Trace()
        for e in t:
            if e[activity_key] in do:
                do_trace.append(e)
                if len(redo_trace) > 0:
                    redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
                    redo_trace = Trace()
            else:
                redo_trace.append(e)
                if len(do_trace) > 0:
                    do_log.append(do_trace)
                    do_trace = Trace()
        if len(redo_trace) > 0:
            redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
        do_log.append(do_trace)
    logs = [do_log]
    logs.extend(redo_logs)
    return logs
Ejemplo n.º 15
0
def apply_numeric(log, int1, int2, parameters=None):
    """
    Apply a filter on cases (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm

    Returns
    --------------
    filtered_df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    case_key = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else xes.DEFAULT_TRACEID_KEY
    # stream_filter_key is helpful to filter on cases containing an event with an attribute
    # in the specified value set, but such events shall have an activity in particular.
    stream_filter_key1 = parameters["stream_filter_key1"] if "stream_filter_key1" in parameters else None
    stream_filter_value1 = parameters["stream_filter_value1"] if "stream_filter_value1" in parameters else None
    stream_filter_key2 = parameters["stream_filter_key2"] if "stream_filter_key2" in parameters else None
    stream_filter_value2 = parameters["stream_filter_value2"] if "stream_filter_value2" in parameters else None

    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if stream_filter_key1 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key1 in x and x[stream_filter_key1] == stream_filter_value1, stream)))
    if stream_filter_key2 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key2 in x and x[stream_filter_key2] == stream_filter_value2, stream)))

    if positive:
        stream = EventStream(list(filter(lambda x: attribute_key in x and int1 <= x[attribute_key] <= int2, stream)))
    else:
        stream = EventStream(
            list(filter(lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream)))

    all_cases_ids = set(x["case:" + case_key] for x in stream)

    filtered_log = EventLog()

    for case in log:
        if case.attributes[case_key] in all_cases_ids:
            filtered_log.append(case)
Ejemplo n.º 16
0
def execute_script():
    L = EventLog()
    e1 = Event()
    e1["concept:name"] = "A"
    e2 = Event()
    e2["concept:name"] = "B"
    e3 = Event()
    e3["concept:name"] = "C"
    e4 = Event()
    e4["concept:name"] = "D"
    t = Trace()
    t.append(e1)
    t.append(e2)
    t.append(e3)
    t.append(e4)
    for i in range(10000):
        L.append(deepcopy(t))
    print(len(L))
Ejemplo n.º 17
0
def replay_prediction(replay_job: Job, training_initial_job: Job,
                      trace_id) -> list:
    """The function create a set with timestamps of events, then create a list of requests
        simulating the log in the time passing
        :param trace_id:
        :param replay_job: job dictionary
        :param training_initial_job: job dictionary
        :return: List of requests
    """

    split = replay_job.split
    log = get_log(split.train_log)
    requests_list = list()
    eventlog = EventLog()
    trace = log[int(trace_id)]
    for key in log.attributes.keys():
        eventlog.attributes[key] = log.attributes[key]
    for index in range(len(trace)):
        new_trace = Trace(trace[0:index])
        for key in trace.attributes:
            new_trace.attributes[key] = trace.attributes[key]
        eventlog.append(new_trace)
    replay_job.case_id = trace_id
    replay_job.event_number = len(trace)
    replay_job.save()
    try:
        logger.error("Sending request for replay_prediction task.")
        r = requests.post(
            url="http://127.0.0.1:8000/runtime/replay_prediction/",
            data=export_log_as_string(eventlog),
            params={
                'jobId': replay_job.id,
                'training_job': training_initial_job.id
            },
            headers={
                'Content-Type': 'text/plain',
                'charset': 'UTF-8'
            })
        requests_list.append(str(r))
    except Exception as e:
        requests_list.append(str(e))
        logger.warning(str(e))

    return requests_list
Ejemplo n.º 18
0
def apply(log, admitted_start_activities, parameters=None):
    """
    Filter the log on the specified start activities

    Parameters
    -----------
    log
        log
    admitted_start_activities
        Admitted start activities
    parameters
        Algorithm parameters

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                               parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters,
                                          True)
    if positive:
        filtered_log = EventLog([
            trace for trace in log
            if trace and trace[0][attribute_key] in admitted_start_activities
        ],
                                attributes=log.attributes,
                                extensions=log.extensions,
                                classifiers=log.classifiers,
                                omni_present=log.omni_present)
    else:
        filtered_log = EventLog([
            trace for trace in log if trace
            and trace[0][attribute_key] not in admitted_start_activities
        ],
                                attributes=log.attributes,
                                extensions=log.extensions,
                                classifiers=log.classifiers,
                                omni_present=log.omni_present)

    return filtered_log
Ejemplo n.º 19
0
def generate_log(pt0, no_traces=100):
    """
    Generate a log out of a process tree

    Parameters
    ------------
    pt
        Process tree
    no_traces
        Number of traces contained in the process tree

    Returns
    ------------
    log
        Trace log object
    """
    pt = deepcopy(pt0)
    # different taus must give different ID in log generation!!!!
    # so we cannot use the default process tree class
    # we use this different one!
    pt = GenerationTree(pt)
    log = EventLog()

    # assigns to each event an increased timestamp from 1970
    curr_timestamp = 10000000

    for i in range(no_traces):
        ex_seq = execute(pt)
        ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq)
        trace = Trace()
        trace.attributes[xes.DEFAULT_NAME_KEY] = str(i)
        for label in ex_seq_labels:
            event = Event()
            event[xes.DEFAULT_NAME_KEY] = label
            event[xes.DEFAULT_TIMESTAMP_KEY] = datetime.datetime.fromtimestamp(
                curr_timestamp)

            trace.append(event)

            curr_timestamp = curr_timestamp + 1

        log.append(trace)

    return log
Ejemplo n.º 20
0
def import_log(filename, parameters=None):
    """
    Imports an XES file into a log object

    Parameters
    ----------
    filename:
        Absolute filename
    parameters
        Parameters of the algorithm, including
            Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp
            Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key
            Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted
            Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file)
            Parameters.SHOW_PROGRESS_BAR -> Enables/disables the progress bar (default: True)
            Parameters.ENCODING -> regulates the encoding (default: utf-8)

    Returns
    -------
    log : :class:`pm4py.log.log.EventLog`
        A log
    """
    from lxml import etree

    if parameters is None:
        parameters = {}

    encoding = exec_utils.get_param_value(Parameters.ENCODING, parameters,
                                          constants.DEFAULT_ENCODING)
    show_progress_bar = exec_utils.get_param_value(
        Parameters.SHOW_PROGRESS_BAR, parameters, True)
    is_compressed = filename.lower().endswith(".gz")

    if pkgutil.find_loader("tqdm") and show_progress_bar:
        if is_compressed:
            f = gzip.open(filename, "rb")
        else:
            f = open(filename, "rb")
        context = etree.iterparse(f,
                                  events=[_EVENT_START, _EVENT_END],
                                  encoding=encoding)
        num_traces = count_traces(context)
    else:
        # avoid the iteration to calculate the number of traces is "tqdm" is not used
        num_traces = 0

    if is_compressed:
        f = gzip.open(filename, "rb")
    else:
        f = open(filename, "rb")
    context = etree.iterparse(f,
                              events=[_EVENT_START, _EVENT_END],
                              encoding=encoding)

    log = EventLog()
    return import_from_context(context, num_traces, log, parameters=parameters)
Ejemplo n.º 21
0
def apply(log, parameters=None):
    """
    Discovers a footprint object from an event log
    (the footprints are returned case-by-case)

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm:
            - Parameters.ACTIVITY_KEY

    Returns
    --------------
    footprints_obj
        List of footprints for the cases of the log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log,
                          variant=converter.TO_EVENT_LOG,
                          parameters=parameters)

    ret = []

    for trace in log:
        dfg = dfg_discovery.apply(EventLog([trace]), parameters=parameters)
        parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg}
        sequence = {(x, y) for (x, y) in dfg if not (y, x) in dfg}
        trace = tuple(x[activity_key] for x in trace)
        activities = set(trace)
        if len(trace) > 0:
            start_activities = {trace[0]}
            end_activities = {trace[-1]}
        else:
            start_activities = set()
            end_activities = set()

        ret.append({
            Outputs.DFG.value: dfg,
            Outputs.SEQUENCE.value: sequence,
            Outputs.PARALLEL.value: parallel,
            Outputs.ACTIVITIES.value: activities,
            Outputs.START_ACTIVITIES.value: start_activities,
            Outputs.END_ACTIVITIES.value: end_activities,
            Outputs.MIN_TRACE_LENGTH.value: len(trace),
            Outputs.TRACE.value: trace
        })

    return ret
Ejemplo n.º 22
0
def create_event_log(log):
    traces = list()
    for events in log.split(", "):
        trace = Trace()
        for e in list(events):
            event = Event()
            event["concept:name"] = e
            trace.append(event)
        traces.append(trace)
    return EventLog(traces)
def event_sampling(input_log, k):
    sampled_log = EventLog()
    sampled_log = input_log
    unique_event = unique_activities(sampled_log)
    sampled_events = random.sample(unique_event, k)
    for t in sampled_log:
        t[:] = [e for e in t if e['concept:name'] in sampled_events]
    sampled_log[:] = [t for t in sampled_log if len(t) != 0]
    print(f'lenght of sampled log is: {len(sampled_log)}')
    return sampled_log
Ejemplo n.º 24
0
def apply(log):
    """
    Filter log by keeping only traces where label is not nan - adapted from pm4py filtering method
    values list
    Parameters
    -----------
    log
        Trace log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed
    Returns
    -----------
    filtered_log
        Filtered log
    """

    attribute_key = "label"
    positive = False

    filtered_log = EventLog()
    for trace in log:
        new_trace = Trace()

        found = False
        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if np.isnan(attribute_value):
                    found = True

        if (found and positive) or (not found and not positive):
            new_trace = trace
        else:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]

        if len(new_trace) > 0:
            filtered_log.append(new_trace)
    return filtered_log
Ejemplo n.º 25
0
def list_to_xes(log):
    traces = list()
    for t in log:
        trace = Trace()
        for e in t.split(", "):
            event = Event()
            event["concept:name"] = e
            trace.append(event)
        traces.append(trace)
    return EventLog(traces)
Ejemplo n.º 26
0
def filter_variants_variants_percentage(log,
                                        variants,
                                        variants_percentage=0.0):
    """
    Filter the log by variants percentage

    Parameters
    ----------
    log
        Log
    variants
        Dictionary with variant as the key and the list of traces as the value
    variants_percentage
        Percentage of variants that should be kept (the most common variant is always kept)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog(list(),
                            attributes=log.attributes,
                            extensions=log.extensions,
                            classifiers=log.classifiers,
                            omni_present=log.omni_present)
    no_of_traces = len(log)
    variant_count = get_variants_sorted_by_count(variants)
    already_added_sum = 0
    shall_break_under = -1

    for i in range(len(variant_count)):
        variant = variant_count[i][0]
        varcount = variant_count[i][1]
        if varcount < shall_break_under:
            break
        percentage_already_added = already_added_sum / no_of_traces
        for trace in variants[variant]:
            filtered_log.append(trace)
        already_added_sum = already_added_sum + varcount
        if percentage_already_added >= variants_percentage:
            shall_break_under = varcount

    return filtered_log
Ejemplo n.º 27
0
def create_sample_logs(clus_dict, cluster_labels, log):
    """
    Build separate logs with traces corresponding to each cluster
    
    Input: Dictionary output of check_sample_list(), EventLog object
    
    Output: List of sample logs as EventLog objects
    """
    """
    Computes the sample logs from the full log given a dictionary mapping cluster labels to case ids.

    Parameters
    -----------
    clus_dict : dict
        Dictionary using the cluster labels as keys and the corresponding list of case ids as values.
    cluster_labels : list
        The labels of the clusters to be discovered
    log
        EventLog object

    Returns
    -----------
    sample_logs : list
        List of EventLog objects
    """
    sample_logs = []

    for cluster_label in cluster_labels:
        caseids = clus_dict[cluster_label]
        args = {
            'attributes': log.attributes,
            'extensions': log.extensions,
            'omni_present': log.omni_present,
            'classifiers': log.classifiers
        }
        samplelog = EventLog(**args)
        for idx in range(len(log)):
            if log[idx].attributes['concept:name'] in caseids:
                samplelog.append(deepcopy(log[idx]))
                #samplelog[-1].attributes['original_log_idx'] = idx
        sample_logs.append(samplelog)

    return sample_logs
Ejemplo n.º 28
0
def apply_numeric(log, int1, int2, parameters=None):
    """
    Apply a filter on cases (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm

    Returns
    --------------
    filtered_df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    case_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes.DEFAULT_TRACEID_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
    # stream_filter_key is helpful to filter on cases containing an event with an attribute
    # in the specified value set, but such events shall have an activity in particular.

    stream_filter_key1 = exec_utils.get_param_value(Parameters.STREAM_FILTER_KEY1, parameters, None)
    stream_filter_value1 = exec_utils.get_param_value(Parameters.STREAM_FILTER_VALUE1, parameters, None)
    stream_filter_key2 = exec_utils.get_param_value(Parameters.STREAM_FILTER_KEY2, parameters, None)
    stream_filter_value2 = exec_utils.get_param_value(Parameters.STREAM_FILTER_VALUE2, parameters, None)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    if stream_filter_key1 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key1 in x and x[stream_filter_key1] == stream_filter_value1, stream)))
    if stream_filter_key2 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key2 in x and x[stream_filter_key2] == stream_filter_value2, stream)))

    if positive:
        stream = EventStream(list(filter(lambda x: attribute_key in x and int1 <= x[attribute_key] <= int2, stream)))
    else:
        stream = EventStream(
            list(filter(lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream)))

    all_cases_ids = set(x["case:" + case_key] for x in stream)

    filtered_log = EventLog()

    for case in log:
        if case.attributes[case_key] in all_cases_ids:
            filtered_log.append(case)
Ejemplo n.º 29
0
def apply_events(log, values, parameters=None):
    """
    Filter log by keeping only events with an attribute value that belongs to the provided values list

    Parameters
    -----------
    log
        log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True

    filtered_log = EventLog()
    for trace in log:
        new_trace = Trace()

        for j in range(len(trace)):
            if attribute_key in trace[j]:
                attribute_value = trace[j][attribute_key]
                if (positive and attribute_value in values) or (
                        not positive and attribute_value not in values):
                    new_trace.append(trace[j])
        if len(new_trace) > 0:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            filtered_log.append(new_trace)
    return filtered_log
Ejemplo n.º 30
0
def apply(log, paths, parameters=None):
    """
    Apply a filter on traces containing / not containing a path

    Parameters
    -----------
    log
        Log
    paths
        Paths that we are looking for (expressed as tuple of 2 strings)
    parameters
        Parameters of the algorithm, including:
            Parameters.ATTRIBUTE_KEY -> Attribute identifying the activity in the log
            Parameters.POSITIVE -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY,
                                               parameters,
                                               xes.DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters,
                                          True)
    filtered_log = EventLog(list(),
                            attributes=log.attributes,
                            extensions=log.extensions,
                            classifiers=log.classifiers,
                            omni_present=log.omni_present)
    for trace in log:
        found = False
        for i in range(len(trace) - 1):
            path = (trace[i][attribute_key], trace[i + 1][attribute_key])
            if path in paths:
                found = True
                break
        if (found and positive) or (not found and not positive):
            filtered_log.append(trace)
    return filtered_log