Ejemplo n.º 1
0
def filter_on_case_size(log, min_case_size=2, max_case_size=None):
    """
    Get only traces in the log with a given size

    Parameters
    -----------
    log
        Log
    min_case_size
        Minimum desidered size of traces
    max_case_size
        Maximum desidered size of traces

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if max_case_size is not None:
        filtered_log = EventLog([
            trace for trace in log
            if min_case_size <= len(trace) <= max_case_size
        ])
    else:
        filtered_log = EventLog(
            [trace for trace in log if len(trace) >= min_case_size])
    return filtered_log
Ejemplo n.º 2
0
def project(log: EventLog, activity: str, activity_key: str) -> List[EventLog]:
    proj = EventLog()
    proj_act = EventLog()
    for t in log:
        proj.append(pm4py.filter_trace(lambda e: e[activity_key] != activity, t))
        proj_act.append(pm4py.filter_trace(lambda e: e[activity_key] == activity, t))
    return [proj_act, proj]
Ejemplo n.º 3
0
def split_log_on_cluster_attribute(log):
    """
    Splits a given log into two sublogs based on the cluster trace attribute. Seperates clustered traces from not yet clustered ones indicated by the cluster attribute having value 0.

    Parameters
    -----------
    log
        EventLog object

    Returns
    -----------
    log1
        EventLog object of traces which are assigned to a cluster.
    log2
        EventLog object of traces not assigned to a cluster yet.
    
    """
    # Insert traces where cluster attribute is nonzero into log1, rest into log2
    log1 = EventLog()
    log2 = EventLog()
    for trace in log:
        if trace.attributes['cluster'] != '0':
            log1.append(trace)
        else:
            log2.append(trace)
    return log1, log2
Ejemplo n.º 4
0
def project(log: EventLog, cut: Cut, activity_key: str) -> List[EventLog]:
    do = cut[0]
    redo = cut[1:]
    do_log = EventLog()
    redo_logs = [EventLog()] * len(redo)
    for t in log:
        do_trace = Trace()
        redo_trace = Trace()
        for e in t:
            if e[activity_key] in do:
                do_trace.append(e)
                if len(redo_trace) > 0:
                    redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
                    redo_trace = Trace()
            else:
                redo_trace.append(e)
                if len(do_trace) > 0:
                    do_log.append(do_trace)
                    do_trace = Trace()
        if len(redo_trace) > 0:
            redo_logs = _append_trace_to_redo_log(redo_trace, redo_logs, redo, activity_key)
        do_log.append(do_trace)
    logs = [do_log]
    logs.extend(redo_logs)
    return logs
Ejemplo n.º 5
0
def get_encoded_logs(job: Job,
                     use_cache: bool = True) -> (DataFrame, DataFrame):
    """returns the encoded logs

    returns the training and test DataFrames encoded using the given job configuration, loading from cache if possible
    :param job: job configuration
    :param use_cache: load or not saved datasets from cache
    :return: training and testing DataFrame

    """
    print('\tGetting Dataset')
    if use_cache:
        if LabelledLog.objects.filter(split=job.split,
                                      encoding=job.encoding,
                                      labelling=job.labelling).exists():
            training_df, test_df = get_labelled_logs(job)

        else:
            if job.split.train_log is not None and \
                job.split.test_log is not None and \
                LoadedLog.objects.filter(train_log=job.split.train_log.path,
                                         test_log=job.split.test_log.path).exists():
                training_log, test_log, additional_columns = get_loaded_logs(
                    job.split)

            else:
                training_log, test_log, additional_columns = prepare_logs(
                    job.split)
                if job.split.type == SplitTypes.SPLIT_SINGLE.value:
                    job.split = duplicate_orm_row(job.split)
                    job.split.type = SplitTypes.SPLIT_DOUBLE.value
                    train_name = '0-' + str(
                        int(100 - (job.split.test_size * 100)))
                    job.split.train_log = create_log(EventLog(training_log),
                                                     train_name + '.xes')
                    test_name = str(int(100 -
                                        (job.split.test_size * 100))) + '-100'
                    job.split.test_log = create_log(EventLog(test_log),
                                                    test_name + '.xes')
                    job.split.additional_columns = str(
                        train_name +
                        test_name)  # TODO: find better naming policy
                    job.save()

                put_loaded_logs(job.split, training_log, test_log,
                                additional_columns)

            training_df, test_df = encode_label_logs(
                training_log,
                test_log,
                job,
                additional_columns=additional_columns)
            put_labelled_logs(job, training_df, test_df)
    else:
        training_log, test_log, additional_columns = prepare_logs(job.split)
        training_df, test_df = encode_label_logs(
            training_log, test_log, job, additional_columns=additional_columns)
    return training_df, test_df
Ejemplo n.º 6
0
def apply(tree1, tree2, log, alignments, parameters=None):
    """
    Alignment repair on tree2 based on the alignment of log on tree1

    Parameters
    -----------
        tree1
            Process Tree
        tree2
            Process Tree
        log
            EventLog
        alignments
            related alignment of log on tree1
        parameters

    Returns
    ------------
    alignments
        repaired alignments
    """
    parameters = {} if parameters is None else parameters
    parameters['COMPARE_OPTION'] = 1 if parameters.get(
        'COMPARE_OPTION') is None else parameters['COMPARE_OPTION']
    ret_tuple_as_trans_desc = False if parameters.get(PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE) is None else \
        parameters[PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE]
    # TODO: if the given alignment is not True, try-catch
    alignments = copy.deepcopy(alignments)
    com_res = pt_compare.apply(tree1, tree2, parameters['COMPARE_OPTION'])
    if com_res.value:
        return alignments
    else:
        tree1_total_number = pt_mani_utils.nodes_number(tree1)
        pt_number.apply(com_res.subtree2, 'D', tree1_total_number + 1)
        best_worst_cost = apply_pt_alignments(EventLog([Trace()]), tree2,
                                              parameters)[0]['cost']
        for i in range(len(alignments)):
            align = alignments[i]
            if align.get("repair") is None:
                scope = detect_change_scope(align['alignment'],
                                            com_res.subtree1, log[i],
                                            ret_tuple_as_trans_desc)
                if not len(scope.traces) == 0:
                    sub_aligns_before = apply_pt_alignments(
                        EventLog(scope.traces), com_res.subtree1, parameters)
                    sub_aligns_after = apply_pt_alignments(
                        EventLog(scope.traces), com_res.subtree2, parameters)
                    alignment_reassemble(align['alignment'], sub_aligns_after,
                                         scope.anchor_index, com_res.subtree1,
                                         ret_tuple_as_trans_desc)
                    recompute_cost(align, sub_aligns_before, sub_aligns_after)
                    recompute_fitness(align, log[i], best_worst_cost)
                align["repair"] = True
        for a in alignments:
            a.pop("repair") if a.get("repair") is not None else None
    return alignments
Ejemplo n.º 7
0
def get_train_test_log(split: Split):
    """Returns training_log and test_log"""
    if split.type == SplitTypes.SPLIT_SINGLE.value and Split.objects.filter(
        type=SplitTypes.SPLIT_DOUBLE.value,
        original_log=split.original_log,
        test_size=split.test_size,
        splitting_method=split.splitting_method
    ).exists() and split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
        return get_train_test_log(Split.objects.filter(
            type=SplitTypes.SPLIT_DOUBLE.value,
            original_log=split.original_log,
            test_size=split.test_size,
            splitting_method=split.splitting_method
        )[0])
    elif split.original_log is not None and (not Split.objects.filter(
        type=SplitTypes.SPLIT_DOUBLE.value,
        original_log=split.original_log,
        test_size=split.test_size,
        splitting_method=split.splitting_method
    ).exists() or split.splitting_method == SplitOrderingMethods.SPLIT_RANDOM.value):
        training_log, test_log = _split_single_log(split)
        additional_columns = get_additional_columns(get_log(split.original_log))

        if split.splitting_method != SplitOrderingMethods.SPLIT_RANDOM.value:
            _ = Split.objects.get_or_create(
                type=SplitTypes.SPLIT_DOUBLE.value,
                original_log=split.original_log,
                test_size=split.test_size,
                splitting_method=split.splitting_method,
                train_log=create_log(EventLog(training_log), '0-' + str(100 - int(split.test_size * 100)) + '.xes'),
                test_log=create_log(EventLog(test_log), str(100 - int(split.test_size * 100)) + '-100.xes'),
                additional_columns=split.additional_columns
            )[0]

        logger.info("\t\tLoaded single log from {}".format(split.original_log.path))
    else:
        # Have to use sklearn to convert some internal data types
        training_log = get_log(split.train_log)
        additional_columns = get_additional_columns(training_log)
        if split.additional_columns is None:
            split.additional_columns = split.train_log.name + split.test_log.name + '_ac.xes'
            split.save()
        training_log, train_log_to_append = train_test_split(training_log, test_size=0, shuffle=False)
        test_log, test_log_to_append = train_test_split(get_log(split.test_log), test_size=0, shuffle=False)
        logger.info("\t\tLoaded double logs from {} and {}.".format(split.train_log.path, split.test_log.path))
    if len(training_log) == 0:
        raise TypeError("Training log is empty. Create a new Split with better parameters")
    return training_log, test_log, additional_columns
Ejemplo n.º 8
0
def get_log_with_log_prefixes(log, parameters=None):
    """
    Gets an extended log that contains, in order, all the prefixes for a case of the original log

    Parameters
    --------------
    log
        Original log
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    all_prefixes_log
        Log with all the prefixes
    change_indexes
        Indexes of the extended log where there was a change between cases
    """
    all_prefixes_log = EventLog()
    change_indexes = []

    for trace in log:
        cumulative_trace = Trace()
        for event in trace:
            all_prefixes_log.append(deepcopy(cumulative_trace))
            cumulative_trace.append(event)
            all_prefixes_log.append(deepcopy(cumulative_trace))
        change_indexes.append([len(all_prefixes_log) - 1] * len(trace))

    return all_prefixes_log, change_indexes
Ejemplo n.º 9
0
def filter_on_case_performance(log, inf_perf, sup_perf, parameters=None):
    """
    Gets a filtered log keeping only traces that satisfy the given performance requirements

    Parameters
    ------------
    log
        Log
    inf_perf
        Lower bound on the performance
    sup_perf
        Upper bound on the performance
    parameters
        Parameters

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    filtered_log = EventLog([
        trace for trace in log
        if satisfy_perf(trace, inf_perf, sup_perf, timestamp_key)
    ])
    return filtered_log
Ejemplo n.º 10
0
def replay_prediction_calculate(job: Job, log) -> (dict, dict):
    """calculate the prediction for the log coming from replayers

    :param job: job dictionary
    :param log: log model
    :return: runtime results
    """
    additional_columns = get_additional_columns(log)
    data_df, _ = train_test_split(log, test_size=0, shuffle=False)
    data_df, _ = encode_label_logs(data_df, EventLog(), job, additional_columns)
    results = MODEL[job.predictive_model.predictive_model][ModelActions.PREDICT.value](job, data_df)
    logger.info("End {} job {}, {} . Results {}".format('runtime', job.predictive_model.predictive_model, get_run(job), results))
    results_dict = dict(zip(data_df['trace_id'], list(map(int, results))))
    events_for_trace = dict()
    data_encoder_decoder(job, data_df, EventLog())
    return results_dict, events_for_trace
Ejemplo n.º 11
0
def filter_log_by_variants_percentage(log, variants, variants_percentage=0.0):
    """
    Filter the log by variants percentage

    Parameters
    ----------
    log
        Log
    variants
        Dictionary with variant as the key and the list of traces as the value
    variants_percentage
        Percentage of variants that should be kept (the most common variant is always kept)

    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog()
    no_of_traces = len(log)
    variant_count = get_variants_sorted_by_count(variants)
    already_added_sum = 0

    for i in range(len(variant_count)):
        variant = variant_count[i][0]
        varcount = variant_count[i][1]
        percentage_already_added = already_added_sum / no_of_traces
        if already_added_sum == 0 or percentage_already_added < variants_percentage:
            for trace in variants[variant]:
                filtered_log.append(trace)
            already_added_sum = already_added_sum + varcount

    return filtered_log
Ejemplo n.º 12
0
def update_merge(loglist):
    mergedlog = EventLog()

    for i in range(len(loglist)):
        for trace in loglist[i]:
            mergedlog.append(trace)
    return mergedlog
Ejemplo n.º 13
0
def keep_one_trace_per_variant(log, parameters=None):
    """
    Keeps only one trace per variant (does not matter for basic inductive miner)

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    --------------
    new_log
        Log (with one trace per variant)
    """
    if parameters is None:
        parameters = {}

    variants = variants_module.get_variants(log, parameters=parameters)
    new_log = EventLog()
    for var in variants:
        new_log.append(variants[var][0])

    return new_log
Ejemplo n.º 14
0
def create_log(G, conn_comp, timestamps, max_comp_len=50, include_loops=False):
    log = EventLog()
    for i in range(len(conn_comp)):
        if len(conn_comp[i]) <= max_comp_len:
            trace = Trace()
            trace.attributes["concept:name"] = str(i)
            SG = G.subgraph(conn_comp[i])
            SGG = networkx.DiGraph(SG)
            edges = list(SGG.edges)
            for e in edges:
                if e[0] == e[1]:
                    SGG.remove_edge(e[0], e[1])
            sorted_nodes = list(networkx.topological_sort(SGG))
            for n in sorted_nodes:
                selfloop = 1 if (n, n) in SG.edges else 0
                trace.append(
                    Event({
                        'time:timestamp': timestamps[n.split("=")[1]],
                        'concept:name': n.split("=")[0],
                        'value': n.split("=")[1],
                        'typevalue': n,
                        'selfloop': selfloop
                    }))
                if include_loops and selfloop:
                    trace.append(
                        Event({
                            'time:timestamp': timestamps[n.split("=")[1]],
                            'concept:name': n.split("=")[0],
                            'value': n.split("=")[1],
                            'typevalue': n,
                            'selfloop': selfloop
                        }))
            log.append(trace)
    log = sorting.sort_timestamp_log(log, "time:timestamp")
    return log
Ejemplo n.º 15
0
def apply_from_variants_list(var_list, parameters=None):
    """
    Discovers the log skeleton from the variants list

    Parameters
    ---------------
    var_list
        Variants list
    parameters
        Parameters

    Returns
    ---------------
    model
        Log skeleton model
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    variant_delimiter = exec_utils.get_param_value(
        Parameters.PARAMETER_VARIANT_DELIMITER, parameters,
        constants.DEFAULT_VARIANT_SEP)

    log = EventLog()
    for cv in var_list:
        v = cv[0]
        tr = v.split(variant_delimiter)
        trace = Trace()
        for act in tr:
            trace.append(Event({activity_key: act}))
        log.append(trace)

    return apply(log, parameters=parameters)
Ejemplo n.º 16
0
def filter_traces_contained(log, dt1, dt2, parameters=None):
    """
    Get traces that are contained in the given interval

    Parameters
    -----------
    log
        Trace log_skeleton
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    filtered_log = EventLog([
        trace for trace in log if is_contained(trace, dt1, dt2, timestamp_key)
    ])
    return filtered_log
Ejemplo n.º 17
0
def sample_log(log, no_traces=100):
    """
    Randomly sample a fixed number of traces from the original log

    Parameters
    -----------
    log
        Log
    no_traces
        Number of traces that the sample should have

    Returns
    -----------
    newLog
        Filtered log
    """
    new_log = EventLog(attributes=log.attributes,
                       extensions=log.extensions,
                       globals=log._omni,
                       classifiers=log.classifiers)
    set_traces = set()
    for i in range(0, min(no_traces, len(log._list))):
        set_traces.add(random.randrange(0, len(log._list)))
    set_traces = list(set_traces)
    for trace in set_traces:
        new_log.append(copy(log._list[trace]))
    return new_log
Ejemplo n.º 18
0
def apply(log, admitted_variants, parameters=None):
    """
    Filter log keeping/removing only provided variants

    Parameters
    -----------
    log
        Log object
    admitted_variants
        Admitted variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Attribute identifying the activity in the log
            Parameters.POSITIVE -> Indicate if events should be kept/removed
    """

    if parameters is None:
        parameters = {}
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
    variants = get_variants(log, parameters=parameters)
    log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
                   omni_present=log.omni_present)
    for variant in variants:
        if (positive and variant in admitted_variants) or (not positive and variant not in admitted_variants):
            for trace in variants[variant]:
                log.append(trace)
    return log
Ejemplo n.º 19
0
def generate_log(pt, no_traces=100):
    """
    Generate a log out of a process tree

    Parameters
    ------------
    pt
        Process tree
    no_traces
        Number of traces contained in the process tree

    Returns
    ------------
    log
        Trace log object
    """
    log = EventLog()

    for i in range(no_traces):
        ex_seq = execute(pt)
        ex_seq_labels = pt_util.project_execution_sequence_to_labels(ex_seq)

        trace = Trace()
        trace.attributes[xes.DEFAULT_NAME_KEY] = str(i)
        for label in ex_seq_labels:
            event = Event()
            event[xes.DEFAULT_NAME_KEY] = label
            trace.append(event)
        log.append(trace)

    return log
Ejemplo n.º 20
0
def apply_tree_variants(variants, parameters=None):
    """
    Apply the IM_F algorithm to a dictionary of variants obtaining a process tree

    Parameters
    ----------
    variants
        Variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    log = EventLog()
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)

    var_keys = list(variants.keys())
    for var in var_keys:
        val = variants[var]
        if type(val) is list:
            val = len(val)
        for i in range(val):
            trace = variants_util.variant_to_trace(var, parameters=parameters)
            log.append(trace)

    return apply_tree(log, parameters=parameters)
Ejemplo n.º 21
0
def apply_tree_variants(variants, parameters=None):
    """
    Apply the IM algorithm to a dictionary of variants obtaining a process tree

    Parameters
    ----------
    variants
        Variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    log = EventLog()
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    var_keys = list(variants.keys())
    for var in var_keys:
        trace = Trace()
        activities = var.split(constants.DEFAULT_VARIANT_SEP)
        for act in activities:
            trace.append(Event({activity_key: act}))
        log.append(trace)

    return apply_tree(log, parameters=parameters)
Ejemplo n.º 22
0
def filter_log_by_end_activities(end_activities,
                                 variants,
                                 vc,
                                 threshold,
                                 activity_key="concept:name"):
    """
    Keep only variants of the log with an end activity which number of occurrences is above the threshold
    
    Parameters
    ----------
    end_activities
        Dictionary of end attributes associated with their count
    variants
        (If specified) Dictionary with variant as the key and the list of traces as the value
    vc
        List of variant names along with their count
    threshold
        Cutting threshold (remove variants having end attributes which number of occurrences is below the threshold
    activity_key
        (If specified) Specify the activity key in the log (default concept:name)
    
    Returns
    ----------
    filtered_log
        Filtered log
    """
    filtered_log = EventLog()
    fvea = variants[vc[0][0]][0][-1][activity_key]
    for variant in variants:
        vea = variants[variant][0][-1][activity_key]
        if vea in end_activities:
            if vea == fvea or end_activities[vea] >= threshold:
                for trace in variants[variant]:
                    filtered_log.append(trace)
    return filtered_log
Ejemplo n.º 23
0
def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]:
    candidates = set(alphabet.keys())
    for t in log:
        candidates = candidates.intersection(set(map(lambda e: e[act_key], t)))
        if len(candidates) == 0:
            return None
    for a in candidates:
        proj = EventLog()
        for t in log:
            proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t))
        if len(list(filter(lambda t: len(t) == 0, proj))) == 0:
            dfg_proj = discover_dfg.apply(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            alphabet_proj = pm4py.get_attribute_values(proj, act_key)
            start_act_proj = get_starters.get_start_activities(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            end_act_proj = get_ends.get_end_activities(log, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj)
            cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj)
            if cut is not None:
                return a
            cut = xor_cut.detect(dfg_proj, alphabet_proj)
            if cut is not None:
                return a
            cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj,
                                        msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None)
            if cut is not None:
                return a
            cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj)
            if cut is not None:
                return a
    return None
Ejemplo n.º 24
0
def apply(df, parameters=None):
    """
    Convert a dataframe into a log containing 1 case per variant (only control-flow
    perspective is considered)

    Parameters
    -------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    -------------
    log
        Event log
    """
    from pm4py.statistics.traces.pandas import case_statistics

    if parameters is None:
        parameters = {}
    variant_stats = case_statistics.get_variant_statistics(df, parameters=parameters)
    activity_key = parameters[
        pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    log = EventLog()
    for vd in variant_stats:
        variant = vd['variant'].split(",")
        trace = Trace()
        for activity in variant:
            event = Event()
            event[activity_key] = activity
            trace.append(event)
        log.append(trace)
    return log
Ejemplo n.º 25
0
def filter_cases_exceeding_specified_acti_performance(log,
                                                      transition_performance,
                                                      activity, lower_bound):
    """
    Filter cases exceeding the specified activity performance threshold

    Parameters
    ------------
    log
        Event log
    transition_performance
        Dictionary where each transition label is associated to performance measures
    activity
        Target activity (of the filter)
    lower_bound
        Lower bound (filter cases which have a duration of the activity exceeding)

    Returns
    ------------
    filtered_log
        Filtered log
    """
    satisfying_indexes = get_idx_exceeding_specified_acti_performance(
        log, transition_performance, activity, lower_bound)
    new_log = EventLog(list(log[i] for i in satisfying_indexes))
    return new_log
def apply(log, admitted_start_activities, parameters=None):
    """
    Filter the log on the specified start activities

    Parameters
    -----------
    log
        log
    admitted_start_activities
        Admitted start activities
    parameters
        Algorithm parameters

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    attribute_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY

    filtered_log = EventLog([
        trace for trace in log
        if trace and trace[0][attribute_key] in admitted_start_activities
    ])

    return filtered_log
Ejemplo n.º 27
0
def apply(log, admitted_end_activities, parameters=None):
    """
    Filter the log on the specified end activities

    Parameters
    -----------
    log
        Log
    admitted_end_activities
        Admitted end activities
    parameters
        Algorithm parameters

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    attribute_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                               parameters, DEFAULT_NAME_KEY)

    filtered_log = [
        trace for trace in log
        if trace and trace[-1][attribute_key] in admitted_end_activities
    ]
    return EventLog(filtered_log)
Ejemplo n.º 28
0
def filter_traces_intersecting(log, dt1, dt2, parameters=None):
    """
    Filter traces intersecting the given interval

    Parameters
    -----------
    log
        Trace log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)
    filtered_log = EventLog([trace for trace in log if is_intersecting(trace, dt1, dt2, timestamp_key)])
    return filtered_log
Ejemplo n.º 29
0
def apply_from_variants_list(var_list, tree, parameters=None):
    """
    Apply the alignments from the specification of a list of variants in the log

    Parameters
    -------------
    var_list
        List of variants (for each item, the first entry is the variant itself, the second entry may be the number of cases)
    tree
        Process tree
    parameters
        Parameters of the algorithm

    Returns
    --------------
    dictio_alignments
        Dictionary that assigns to each variant its alignment
    """
    if parameters is None:
        parameters = {}

    dictio_alignments = {}
    log = EventLog()

    for index, varitem in enumerate(var_list):
        trace = variants_util.variant_to_trace(varitem[0], parameters=parameters)
        log.append(trace)

    alignments = apply(log, tree, parameters=parameters)
    for index, varitem in enumerate(var_list):
        dictio_alignments[varitem[0]] = alignments[index]
    return dictio_alignments
Ejemplo n.º 30
0
def apply(log, admitted_variants, parameters=None):
    """
    Filter log keeping/removing only provided variants

    Parameters
    -----------
    log
        Log object
    admitted_variants
        Admitted variants
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed
    """

    if parameters is None:
        parameters = {}
    positive = parameters["positive"] if "positive" in parameters else True
    variants = get_variants(log, parameters=parameters)
    log = EventLog()
    for variant in variants:
        if (positive and variant in admitted_variants) or (
                not positive and variant not in admitted_variants):
            for trace in variants[variant]:
                log.append(trace)
    return log