Ejemplos de EventLog.append en Python, ejemplos de pm4py.objects.log.log.EventLog.append en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: pyarrow_event_log.py Proyecto: oli-on-tour/pm4py-source

def apply(bytes, parameters=None):
    """
    Apply the deserialization to the bytes produced by Pyarrow serialization

    Parameters
    --------------
    bytes
        Bytes
    parameters
        Parameters of the algorithm

    Returns
    --------------
    deser
        Deserialized object
    """
    if parameters is None:
        parameters = {}

    buffer = pyarrow.py_buffer(bytes)
    list_objs = pyarrow.deserialize(buffer)
    log = EventLog(attributes=list_objs[0],
                   extensions=list_objs[1],
                   omni_present=list_objs[2],
                   classifiers=list_objs[3])
    for i in range(len(list_objs[4])):
        trace = Trace(attributes=list_objs[4][i])
        for j in range(len(list_objs[5][i])):
            trace.append(Event(list_objs[5][i][j]))
        log.append(trace)
    return log

Ejemplo n.º 2

0

Mostrar archivo

def apply(log, admitted_variants, parameters=None):
    """
    Filter log keeping/removing only provided variants

    Parameters
    -----------
    log
        Log object
    admitted_variants
        Admitted variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Attribute identifying the activity in the log
            Parameters.POSITIVE -> Indicate if events should be kept/removed
    """

    if parameters is None:
        parameters = {}
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
    variants = get_variants(log, parameters=parameters)
    log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
                   omni_present=log.omni_present)
    for variant in variants:
        if (positive and variant in admitted_variants) or (not positive and variant not in admitted_variants):
            for trace in variants[variant]:
                log.append(trace)
    return log

Ejemplo n.º 3

0

Mostrar archivo

def sample_log(log, no_traces=100):
    """
    Randomly sample a fixed number of traces from the original log

    Parameters
    -----------
    log
        Log
    no_traces
        Number of traces that the sample should have

    Returns
    -----------
    newLog
        Filtered log
    """
    new_log = EventLog(attributes=log.attributes,
                       extensions=log.extensions,
                       globals=log._omni,
                       classifiers=log.classifiers)
    set_traces = set()
    for i in range(0, min(no_traces, len(log._list))):
        set_traces.add(random.randrange(0, len(log._list)))
    set_traces = list(set_traces)
    for trace in set_traces:
        new_log.append(copy(log._list[trace]))
    return new_log

Ejemplo n.º 4

0

Mostrar archivo

def apply(df, parameters=None):
    """
    Convert a dataframe into a log containing 1 case per variant (only control-flow
    perspective is considered)

    Parameters
    -------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        Event log
    """
    from pm4py.statistics.traces.pandas import case_statistics

    if parameters is None:
        parameters = {}
    variant_stats = case_statistics.get_variant_statistics(df, parameters=parameters)
    activity_key = parameters[
        pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    log = EventLog()
    for vd in variant_stats:
        variant = vd['variant'].split(",")
        trace = Trace()
        for activity in variant:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        log.append(trace)
    return log

Ejemplo n.º 5

0

Mostrar archivo

def filter_log_by_variants_percentage(log, variants, variants_percentage=0.0):
    """
    Filter the log by variants percentage

    Parameters
    ----------
    log
        Log
    variants
        Dictionary with variant as the key and the list of traces as the value
    variants_percentage
        Percentage of variants that should be kept (the most common variant is always kept)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog()
    no_of_traces = len(log)
    variant_count = get_variants_sorted_by_count(variants)
    already_added_sum = 0

    for i in range(len(variant_count)):
        variant = variant_count[i][0]
        varcount = variant_count[i][1]
        percentage_already_added = already_added_sum / no_of_traces
        if already_added_sum == 0 or percentage_already_added < variants_percentage:
            for trace in variants[variant]:
                filtered_log.append(trace)
            already_added_sum = already_added_sum + varcount

    return filtered_log

Ejemplo n.º 6

0

Mostrar archivo

def apply(log, admitted_variants, parameters=None):
    """
    Filter log keeping/removing only provided variants

    Parameters
    -----------
    log
        Log object
    admitted_variants
        Admitted variants
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed
    """

    if parameters is None:
        parameters = {}
    positive = parameters["positive"] if "positive" in parameters else True
    variants = get_variants(log, parameters=parameters)
    log = EventLog()
    for variant in variants:
        if (positive and variant in admitted_variants) or (
                not positive and variant not in admitted_variants):
            for trace in variants[variant]:
                log.append(trace)
    return log

Ejemplo n.º 7

0

Mostrar archivo

Archivo: merge_log.py Proyecto: yoannlgd1/pm4py-core

def update_merge(loglist):
    mergedlog = EventLog()

    for i in range(len(loglist)):
        for trace in loglist[i]:
            mergedlog.append(trace)
    return mergedlog

Ejemplo n.º 8

0

Mostrar archivo

Archivo: sampling.py Proyecto: fit-daniel-schuster/online_process_monitoring_using_incremental_state-space_expansion_an_exact_algorithm

def sample_eventlog(event_log, no_events=100):
    """
    Randomly sample a fixed number of events from the original event log

    Parameters
    -----------
    event_log
        Event log
    no_events
        Number of events that the sample should have

    Returns
    -----------
    newLog
        Filtered log
    """
    new_log = EventLog(attributes=event_log.attributes, extensions=event_log.extensions, globals=event_log._omni,
                       classifiers=event_log.classifiers)
    set_events = set()
    for i in range(0, min(no_events, len(event_log._list))):
        set_events.add(random.randrange(0, len(event_log._list)))
    set_events = list(set_events)
    for event in set_events:
        new_log.append(copy(event_log._list[event]))
    return new_log

Ejemplo n.º 9

0

Mostrar archivo

Archivo: original.py Proyecto: yoannlgd1/pm4py-core

def __align(obj: Union[Trace, EventLog],
            pt: ProcessTree,
            max_trace_length: int = 1,
            max_process_tree_height: int = 1,
            parameters=None):
    """
    this function approximates alignments for a given event log or trace and a process tree

    :param obj: event log or single trace
    :param pt: process tree
    :param max_trace_length: specifies when the recursive splitting stops based on the trace's length
    :param max_process_tree_height: specifies when the recursive splitting stops based on the tree's height
    :return:
    """
    assert isinstance(pt, ProcessTree)
    if isinstance(obj, Trace):
        e = EventLog()
        e.append(obj)
        obj = e
    assert isinstance(obj, EventLog)
    pt = process_tree_to_binary_process_tree(pt)
    pt = EfficientTree(pt)

    return __approximate_alignments_for_log(obj,
                                            pt,
                                            max_trace_length,
                                            max_process_tree_height,
                                            parameters=parameters)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: sample.py Proyecto: madhubs08/TraceClustering

def write_sample_logs_to_fs(clus_dict, filepath):
    """
    Build separate logs with traces corresponding to each cluster and write them to the filesystem.

    Parameters
    -----------
    clus_dict : dict
        Dictionary using the cluster labels as keys and the corresponding list of case ids as values.
    filepath
        Path to the XES log file
    """
    log = xes_importer.import_log(filepath)

    for key, value in clus_dict.items():
        args = {
            'attributes': log.attributes,
            'extensions': log.extensions,
            'omni_present': log.omni_present,
            'classifiers': log.classifiers
        }
        samplelog = EventLog(**args)
        goalpath = filepath[:-4] + "_" + key + ".xes"
        for trace in log:
            if trace.attributes['concept:name'] in value:
                samplelog.append(deepcopy(trace))
        xes_exporter.export_log(samplelog, goalpath)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: filter_subsets.py Proyecto: kfly89/pm4py-core

def apply_variants_filter(log, admitted_variants, parameters=None):
    """
    Filter log keeping/removing only provided variants

    Parameters
    -----------
    log
        Log object
    admitted_variants
        Admitted variants
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed
    """

    if parameters is None:
        parameters = {}
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters,
                                          True)
    variants = variants_statistics.get_variants(log, parameters=parameters)
    log = EventLog()
    for variant in variants:
        if (positive and variant in admitted_variants) or (
                not positive and variant not in admitted_variants):
            for trace in variants[variant]:
                log.append(trace)
    return log

Ejemplo n.º 12

0

Mostrar archivo

Archivo: df_to_event_log_1v.py Proyecto: oli-on-tour/pm4py-source

def apply(df, parameters=None):
    """
    Convert a dataframe into a log containing 1 case per variant (only control-flow
    perspective is considered)

    Parameters
    -------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        Event log
    """
    if parameters is None:
        parameters = {}
    variant_stats = case_statistics.get_variant_statistics(
        df, parameters=parameters)
    log = EventLog()
    for vd in variant_stats:
        variant = vd['variant'].split(",")
        trace = Trace()
        for activity in variant:
            event = Event()
            event[xes.DEFAULT_NAME_KEY] = activity
            trace.append(event)
        log.append(trace)
    return log

Ejemplo n.º 13

0

Mostrar archivo

def get_log_with_log_prefixes(log, parameters=None):
    """
    Gets an extended log that contains, in order, all the prefixes for a case of the original log

    Parameters
    --------------
    log
        Original log
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    all_prefixes_log
        Log with all the prefixes
    """
    all_prefixes_log = EventLog()

    for trace in log:
        cumulative_trace = Trace()
        for event in trace:
            all_prefixes_log.append(deepcopy(cumulative_trace))
            cumulative_trace.append(event)
        all_prefixes_log.append(deepcopy(cumulative_trace))

    return all_prefixes_log

Ejemplo n.º 14

0

Mostrar archivo

def project(log, groups, activity_key):
    '''
    This method projects the log based on a presumed sequence cut and a list of activity groups
    Parameters
    ----------
    log
        original log
    groups
        list of activity sets to be used in projection (activities can only appear in one group)
    activity_key
        key to use in the event to derive the activity name

    Returns
    -------
        list of corresponding logs according to the sequence cut.
    '''
    # currently, not 'noise' proof
    logs = list()
    for group in groups:
        proj = EventLog()
        for t in log:
            proj.append(
                pm4py.filter_trace(lambda e: e[activity_key] in group, t))
        logs.append(proj)
    return logs

Ejemplo n.º 15

0

Mostrar archivo

Archivo: classic.py Proyecto: denzoned/pm4py-core

def apply_from_variants_list(var_list, parameters=None):
    """
    Discovers the log skeleton from the variants list

    Parameters
    ---------------
    var_list
        Variants list
    parameters
        Parameters

    Returns
    ---------------
    model
        Log skeleton model
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    variant_delimiter = exec_utils.get_param_value(
        Parameters.PARAMETER_VARIANT_DELIMITER, parameters,
        constants.DEFAULT_VARIANT_SEP)

    log = EventLog()
    for cv in var_list:
        v = cv[0]
        tr = v.split(variant_delimiter)
        trace = Trace()
        for act in tr:
            trace.append(Event({activity_key: act}))
        log.append(trace)

    return apply(log, parameters=parameters)

Ejemplo n.º 16

0

Mostrar archivo

def import_tel_from_yawl(input_file_path):
    '''
    Imports translucent event log from yawl logging

    Parameters
    ----------
    :param input_file_path: input file path of yawl logging

    Returns
    --------
    :return: translucent event log (only complete)
    '''

    log = import_tel(input_file_path)
    new_log = EventLog()
    s = set()
    for trace in log:
        new_trace = Trace()
        ci = trace.attributes['concept:name']
        for event in trace:
            if event['lifecycle:instance'] == ci:
                if event['lifecycle:transition'] == 'schedule':
                    s.add(event['concept:name'])
                elif event['lifecycle:transition'] == 'complete':
                    event.set_enabled(frozenset(s))
                    new_trace.append(event)
                    s.remove(event['concept:name'])
        new_log.append(new_trace)
    return new_log

Ejemplo n.º 17

0

Mostrar archivo

def project(log: EventLog, cut: Cut, activity_key: str) -> List[EventLog]:
    do = cut[0]
    redo = cut[1:]
    do_log = EventLog()
    redo_logs = [EventLog()] * len(redo)
    for t in log:
        do_trace = Trace()
        redo_trace = Trace()
        for e in t:
            if e[activity_key] in do:
                do_trace.append(e)
                if len(redo_trace) > 0:
                    redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
                    redo_trace = Trace()
            else:
                redo_trace.append(e)
                if len(do_trace) > 0:
                    do_log.append(do_trace)
                    do_trace = Trace()
        if len(redo_trace) > 0:
            redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
        do_log.append(do_trace)
    logs = [do_log]
    logs.extend(redo_logs)
    return logs

Ejemplo n.º 18

0

Mostrar archivo

Archivo: build_graph.py Proyecto: timorohrer/pm4py-mdl

def create_log(G, conn_comp, timestamps, max_comp_len=50, include_loops=False):
    log = EventLog()
    for i in range(len(conn_comp)):
        if len(conn_comp[i]) <= max_comp_len:
            trace = Trace()
            trace.attributes["concept:name"] = str(i)
            SG = G.subgraph(conn_comp[i])
            SGG = networkx.DiGraph(SG)
            edges = list(SGG.edges)
            for e in edges:
                if e[0] == e[1]:
                    SGG.remove_edge(e[0], e[1])
            sorted_nodes = list(networkx.topological_sort(SGG))
            for n in sorted_nodes:
                selfloop = 1 if (n, n) in SG.edges else 0
                trace.append(
                    Event({
                        'time:timestamp': timestamps[n.split("=")[1]],
                        'concept:name': n.split("=")[0],
                        'value': n.split("=")[1],
                        'typevalue': n,
                        'selfloop': selfloop
                    }))
                if include_loops and selfloop:
                    trace.append(
                        Event({
                            'time:timestamp': timestamps[n.split("=")[1]],
                            'concept:name': n.split("=")[0],
                            'value': n.split("=")[1],
                            'typevalue': n,
                            'selfloop': selfloop
                        }))
            log.append(trace)
    log = sorting.sort_timestamp_log(log, "time:timestamp")
    return log

Ejemplo n.º 19

0

Mostrar archivo

def apply_from_variants_list(var_list, tree, parameters=None):
    """
    Apply the alignments from the specification of a list of variants in the log

    Parameters
    -------------
    var_list
        List of variants (for each item, the first entry is the variant itself, the second entry may be the number of cases)
    tree
        Process tree
    parameters
        Parameters of the algorithm

    Returns
    --------------
    dictio_alignments
        Dictionary that assigns to each variant its alignment
    """
    if parameters is None:
        parameters = {}

    dictio_alignments = {}
    log = EventLog()

    for index, varitem in enumerate(var_list):
        trace = variants_util.variant_to_trace(varitem[0], parameters=parameters)
        log.append(trace)

    alignments = apply(log, tree, parameters=parameters)
    for index, varitem in enumerate(var_list):
        dictio_alignments[varitem[0]] = alignments[index]
    return dictio_alignments

Ejemplo n.º 20

0

Mostrar archivo

Archivo: end_activities_filter.py Proyecto: yoannlgd1/pm4py-core

def filter_log_by_end_activities(end_activities,
                                 variants,
                                 vc,
                                 threshold,
                                 activity_key="concept:name"):
    """
    Keep only variants of the log with an end activity which number of occurrences is above the threshold
    
    Parameters
    ----------
    end_activities
        Dictionary of end attributes associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove variants having end attributes which number of occurrences is below the threshold
    activity_key
        (If specified) Specify the activity key in the log (default concept:name)
    
    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog()
    fvea = variants[vc[0][0]][0][-1][activity_key]
    for variant in variants:
        vea = variants[variant][0][-1][activity_key]
        if vea in end_activities:
            if vea == fvea or end_activities[vea] >= threshold:
                for trace in variants[variant]:
                    filtered_log.append(trace)
    return filtered_log

Ejemplo n.º 21

0

Mostrar archivo

def apply_from_variants_list(var_list, parameters=None):
    """
    Discovers the log skeleton from the variants list

    Parameters
    ---------------
    var_list
        Variants list
    parameters
        Parameters

    Returns
    ---------------
    model
        Log skeleton model
    """
    if parameters is None:
        parameters = {}

    log = EventLog()
    for cv in var_list:
        v = cv[0]
        trace = variants_util.variant_to_trace(v, parameters=parameters)
        log.append(trace)

    return apply(log, parameters=parameters)

Ejemplo n.º 22

0

Mostrar archivo

def project(log: EventLog, activity: str, activity_key: str) -> List[EventLog]:
    proj = EventLog()
    proj_act = EventLog()
    for t in log:
        proj.append(pm4py.filter_trace(lambda e: e[activity_key] != activity, t))
        proj_act.append(pm4py.filter_trace(lambda e: e[activity_key] == activity, t))
    return [proj_act, proj]

Ejemplo n.º 23

0

Mostrar archivo

def apply_tree_variants(variants, parameters=None):
    """
    Apply the IM_F algorithm to a dictionary of variants obtaining a process tree

    Parameters
    ----------
    variants
        Variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    log = EventLog()
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)

    var_keys = list(variants.keys())
    for var in var_keys:
        val = variants[var]
        if type(val) is list:
            val = len(val)
        for i in range(val):
            trace = variants_util.variant_to_trace(var, parameters=parameters)
            log.append(trace)

    return apply_tree(log, parameters=parameters)

Ejemplo n.º 24

0

Mostrar archivo

def keep_one_trace_per_variant(log, parameters=None):
    """
    Keeps only one trace per variant (does not matter for basic inductive miner)

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    --------------
    new_log
        Log (with one trace per variant)
    """
    if parameters is None:
        parameters = {}

    variants = variants_module.get_variants(log, parameters=parameters)
    new_log = EventLog()
    for var in variants:
        new_log.append(variants[var][0])

    return new_log

Ejemplo n.º 25

0

Mostrar archivo

def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]:
    candidates = set(alphabet.keys())
    for t in log:
        candidates = candidates.intersection(set(map(lambda e: e[act_key], t)))
        if len(candidates) == 0:
            return None
    for a in candidates:
        proj = EventLog()
        for t in log:
            proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t))
        if len(list(filter(lambda t: len(t) == 0, proj))) == 0:
            dfg_proj = discover_dfg.apply(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            alphabet_proj = pm4py.get_attribute_values(proj, act_key)
            start_act_proj = get_starters.get_start_activities(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            end_act_proj = get_ends.get_end_activities(log, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj)
            cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj)
            if cut is not None:
                return a
            cut = xor_cut.detect(dfg_proj, alphabet_proj)
            if cut is not None:
                return a
            cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj,
                                        msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None)
            if cut is not None:
                return a
            cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj)
            if cut is not None:
                return a
    return None

Ejemplo n.º 26

0

Mostrar archivo

def apply_tree_variants(variants, parameters=None):
    """
    Apply the IM algorithm to a dictionary of variants obtaining a process tree

    Parameters
    ----------
    variants
        Variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    log = EventLog()
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    var_keys = list(variants.keys())
    for var in var_keys:
        trace = Trace()
        activities = var.split(constants.DEFAULT_VARIANT_SEP)
        for act in activities:
            trace.append(Event({activity_key: act}))
        log.append(trace)

    return apply_tree(log, parameters=parameters)

Ejemplo n.º 27

0

Mostrar archivo

def generate_log(pt, no_traces=100):
    """
    Generate a log out of a process tree

    Parameters
    ------------
    pt
        Process tree
    no_traces
        Number of traces contained in the process tree

    Returns
    ------------
    log
        Trace log object
    """
    log = EventLog()

    for i in range(no_traces):
        ex_seq = execute(pt)
        ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq)

        trace = Trace()
        trace.attributes[xes.DEFAULT_NAME_KEY] = str(i)
        for label in ex_seq_labels:
            event = Event()
            event[xes.DEFAULT_NAME_KEY] = label
            trace.append(event)
        log.append(trace)

    return log

Ejemplo n.º 28

0

Mostrar archivo

Archivo: get_prefixes.py Proyecto: yoannlgd1/pm4py-core

def get_log_with_log_prefixes(log, parameters=None):
    """
    Gets an extended log that contains, in order, all the prefixes for a case of the original log

    Parameters
    --------------
    log
        Original log
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    all_prefixes_log
        Log with all the prefixes
    change_indexes
        Indexes of the extended log where there was a change between cases
    """
    all_prefixes_log = EventLog()
    change_indexes = []

    for trace in log:
        cumulative_trace = Trace()
        for event in trace:
            all_prefixes_log.append(deepcopy(cumulative_trace))
            cumulative_trace.append(event)
            all_prefixes_log.append(deepcopy(cumulative_trace))
        change_indexes.append([len(all_prefixes_log) - 1] * len(trace))

    return all_prefixes_log, change_indexes

Ejemplo n.º 29

0

Mostrar archivo

Archivo: cluster.py Proyecto: madhubs08/TraceClustering

def split_log_on_cluster_attribute(log):
    """
    Splits a given log into two sublogs based on the cluster trace attribute. Seperates clustered traces from not yet clustered ones indicated by the cluster attribute having value 0.

    Parameters
    -----------
    log
        EventLog object

    Returns
    -----------
    log1
        EventLog object of traces which are assigned to a cluster.
    log2
        EventLog object of traces not assigned to a cluster yet.
    
    """
    # Insert traces where cluster attribute is nonzero into log1, rest into log2
    log1 = EventLog()
    log2 = EventLog()
    for trace in log:
        if trace.attributes['cluster'] != '0':
            log1.append(trace)
        else:
            log2.append(trace)
    return log1, log2

Ejemplo n.º 30

0

Mostrar archivo

Archivo: paths_filter.py Proyecto: hieule23/pm4py-core

def filter_log_by_paths(log,
                        paths,
                        variants,
                        vc,
                        threshold,
                        attribute_key="concept:name"):
    """
    Keep only paths which number of occurrences is above the threshold (or they belong to the first variant)

    Parameters
    ----------
    log
        Log
    paths
        Dictionary of paths associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove paths which number of occurrences is below the threshold)
    attribute_key
        (If specified) Specify the attribute key to use (default concept:name)

    Returns
    ----------
    filtered_log
        Filtered log_skeleton
    """
    filtered_log = EventLog()
    fvft = variants[vc[0][0]][0]
    fvp = set()
    for i in range(0, len(fvft) - 1):
        path = fvft[i][attribute_key] + "," + fvft[i + 1][attribute_key]
        fvp.add(path)
    for trace in log:
        new_trace = Trace()
        jj = 0
        if len(trace) > 0:
            new_trace.append(trace[0])
            for j in range(1, len(trace) - 1):
                jj = j
                if j >= len(trace):
                    break
                if attribute_key in trace[j] and attribute_key in trace[j + 1]:
                    path = trace[j][attribute_key] + "," + trace[
                        j + 1][attribute_key]
                    if path in paths:
                        if path in fvp or paths[path] >= threshold:
                            new_trace.append(trace[j])
                            new_trace.append(trace[j + 1])
        if len(trace) > 1 and not jj == len(trace):
            new_trace.append(trace[-1])
        if len(new_trace) > 0:
            for attr in trace.attributes:
                new_trace.attributes[attr] = trace.attributes[attr]
            filtered_log.append(new_trace)
    return filtered_log