Ejemplo n.º 1
0
def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]:
    candidates = set(alphabet.keys())
    for t in log:
        candidates = candidates.intersection(set(map(lambda e: e[act_key], t)))
        if len(candidates) == 0:
            return None
    for a in candidates:
        proj = EventLog()
        for t in log:
            proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t))
        if len(list(filter(lambda t: len(t) == 0, proj))) == 0:
            dfg_proj = discover_dfg.apply(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            alphabet_proj = pm4py.get_attribute_values(proj, act_key)
            start_act_proj = get_starters.get_start_activities(proj, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            end_act_proj = get_ends.get_end_activities(log, parameters={
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
            pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj)
            cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj)
            if cut is not None:
                return a
            cut = xor_cut.detect(dfg_proj, alphabet_proj)
            if cut is not None:
                return a
            cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj,
                                        msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={
                                        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None)
            if cut is not None:
                return a
            cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj)
            if cut is not None:
                return a
    return None
Ejemplo n.º 2
0
def derive_msd_witnesses(
    log: EventLog,
    msd: Optional[Dict[Any, int]] = None,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Dict[str, Set[str]]:
    '''
    This function derives the minimum self distance witnesses.
    The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc.
    The minimum self distance is the minimal observed self distance value in the event log.
    A 'witness' is an activity that witnesses the minimum self distance.
    For example, if the minimum self distance of activity a in some log L is 2, then,
    if trace <a,b,c,a> is in log L, b and c are a witness of a.

    Parameters
    ----------
    log
        Event Log to use
    msd
        Optional minimum self distance dictionary
    parameters
        Optional parameters dictionary

    Returns
    -------
    Dictionary mapping each activity to a set of witnesses.

    '''
    log = converter.apply(log,
                          variant=converter.Variants.TO_EVENT_LOG,
                          parameters=parameters)
    act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters,
                                         xes_constants.DEFAULT_NAME_KEY)
    alphabet = pm4py.get_event_attribute_values(log, act_key)
    msd = msd if msd is not None else msd_algo.apply(log, parameters)
    log = list(map(lambda t: list(map(lambda e: e[act_key], t)), log))
    witnesses = dict()
    for a in alphabet:
        if a in msd and msd[a] > 0:
            witnesses[a] = set()
        else:
            continue
        for t in log:
            if len(list(filter(lambda e: e == a, t))) > 1:
                indices = [i for i, x in enumerate(t) if x == a]
                for i in range(len(indices) - 1):
                    if indices[i + 1] - indices[i] - 1 == msd[a]:
                        for b in t[indices[i] + 1:indices[i + 1]]:
                            witnesses[a].add(b)
    return witnesses
Ejemplo n.º 3
0
def get_minimum_self_distances(log: EventLog) -> Dict[str, int]:
    '''
    This algorithm computes the minimum self-distance for each activity observed in an event log.
    The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc.
    The minimum self distance is the minimal observed self distance value in the event log.

    Parameters
    ----------
    log
        event log (either pandas.DataFrame, EventLog or EventStream)

    Returns
    -------
        dict mapping an activity to its self-distance, if it exists, otherwise it is not part of the dict.
    '''
    from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo
    return msd_algo.apply(log)
Ejemplo n.º 4
0
def derive_minimum_self_distance(log: Union[DataFrame, EventLog, EventStream]) -> Dict[str, int]:
    '''
        This algorithm computes the minimum self-distance for each activity observed in an event log.
        The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc.
        The activity key 'concept:name' is used.


        Parameters
        ----------
        log
            event log (either pandas.DataFrame, EventLog or EventStream)

        Returns
        -------
            dict mapping an activity to its self-distance, if it exists, otherwise it is not part of the dict.
        '''
    from pm4py.algo.discovery.minimum_self_distance import algorithm as msd
    return msd.apply(log)
Ejemplo n.º 5
0
def get_minimum_self_distances(log: EventLog) -> Dict[str, int]:
    '''
    This algorithm computes the minimum self-distance for each activity observed in an event log.
    The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc.
    The minimum self distance is the minimal observed self distance value in the event log.

    Parameters
    ----------
    log
        event log (either pandas.DataFrame, EventLog or EventStream)

    Returns
    -------
        dict mapping an activity to its self-distance, if it exists, otherwise it is not part of the dict.
    '''
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo
    return msd_algo.apply(log, parameters=get_properties(log))
Ejemplo n.º 6
0
def get_minimum_self_distance_witnesses(log: EventLog) -> Dict[str, Set[str]]:
    '''
        This function derives the minimum self distance witnesses.
        The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc.
        The minimum self distance is the minimal observed self distance value in the event log.
        A 'witness' is an activity that witnesses the minimum self distance.
        For example, if the minimum self distance of activity a in some log L is 2, then,
        if trace <a,b,c,a> is in log L, b and c are a witness of a.

        Parameters
        ----------
        log
            Event Log to use

        Returns
        -------
        Dictionary mapping each activity to a set of witnesses.

        '''
    from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo
    from pm4py.algo.discovery.minimum_self_distance import utils as msdw_algo
    return msdw_algo.derive_msd_witnesses(log, msd_algo.apply(log))
Ejemplo n.º 7
0
def get_minimum_self_distance_witnesses(log: EventLog) -> Dict[str, Set[str]]:
    '''
        This function derives the minimum self distance witnesses.
        The self distance of a in <a> is infinity, of a in <a,a> is 0, in <a,b,a> is 1, etc.
        The minimum self distance is the minimal observed self distance value in the event log.
        A 'witness' is an activity that witnesses the minimum self distance.
        For example, if the minimum self distance of activity a in some log L is 2, then,
        if trace <a,b,c,a> is in log L, b and c are a witness of a.

        Parameters
        ----------
        log
            Event Log to use

        Returns
        -------
        Dictionary mapping each activity to a set of witnesses.

        '''
    if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!")

    from pm4py.algo.discovery.minimum_self_distance import algorithm as msd_algo
    from pm4py.algo.discovery.minimum_self_distance import utils as msdw_algo
    return msdw_algo.derive_msd_witnesses(log, msd_algo.apply(log, parameters=get_properties(log)), parameters=get_properties(log))
Ejemplo n.º 8
0
def __inductive_miner_internal(log,
                               dfg,
                               threshold,
                               root,
                               act_key,
                               use_msd,
                               remove_noise=False):
    alphabet = pm4py.get_event_attribute_values(log, act_key)
    if threshold > 0 and remove_noise:
        end_activities = get_ends.get_end_activities(
            log,
            parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})

        dfg = __filter_dfg_on_threshold(dfg, end_activities, threshold)

    original_length = len(log)
    log = pm4py.filter_log(lambda t: len(t) > 0, log)

    # revised EMPTYSTRACES
    if original_length - len(log) > original_length * threshold:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key,
            [EventLog(), log], use_msd)

    start_activities = get_starters.get_start_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
    end_activities = get_ends.get_end_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})

    if __is_base_case_act(log, act_key) or __is_base_case_silent(log):
        return __apply_base_case(log, root, act_key)
    pre, post = dfg_utils.get_transitive_relations(dfg, alphabet)
    cut = sequence_cut.detect(alphabet, pre, post)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key,
            sequence_cut.project(log, cut, act_key), use_msd)
    cut = xor_cut.detect(dfg, alphabet)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key,
            xor_cut.project(log, cut, act_key), use_msd)
    cut = concurrent_cut.detect(
        dfg,
        alphabet,
        start_activities,
        end_activities,
        msd=msdw_algo.derive_msd_witnesses(
            log,
            msd_algo.apply(log,
                           parameters={
                               constants.PARAMETER_CONSTANT_ACTIVITY_KEY:
                               act_key
                           }),
            parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
        if use_msd else None)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
            concurrent_cut.project(log, cut, act_key), use_msd)
    cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities)
    if cut is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
            loop_cut.project(log, cut, act_key), use_msd)

    aopt = activity_once_per_trace.detect(log, alphabet, act_key)
    if aopt is not None:
        operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root)
        operator.children.append(
            pt.ProcessTree(operator=None, parent=operator, label=aopt))
        return __add_operator_recursive_logs(
            operator, threshold, act_key,
            activity_once_per_trace.project(log, aopt, act_key), use_msd)
    act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd)
    if act_conc is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
            activity_concurrent.project(log, act_conc, act_key), use_msd)
    stl = strict_tau_loop.detect(log, start_activities, end_activities,
                                 act_key)
    if stl is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
            [stl, EventLog()], use_msd)
    tl = tau_loop.detect(log, start_activities, act_key)
    if tl is not None:
        return __add_operator_recursive_logs(
            pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
            [tl, EventLog()], use_msd)

    if threshold > 0 and not remove_noise:
        return __inductive_miner(log,
                                 dfg,
                                 threshold,
                                 root,
                                 act_key,
                                 use_msd,
                                 remove_noise=True)

    return __flower(alphabet, root)
Ejemplo n.º 9
0
 def test_minimum_self_distance(self):
     import pm4py
     from pm4py.algo.discovery.minimum_self_distance import algorithm as minimum_self_distance
     log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"))
     msd = minimum_self_distance.apply(log)
Ejemplo n.º 10
0
def inductive_miner(log, dfg, threshold, root, act_key, use_msd):
    alphabet = pm4py.get_attribute_values(log, act_key)
    start_activities = get_starters.get_start_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
    end_activities = get_ends.get_end_activities(
        log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key})
    empty_traces = pm4py.filter_log(lambda trace: len(trace) == 0, log)
    if len(empty_traces) == 0:
        if _is_base_case_act(log, act_key) or _is_base_case_silent(log):
            return _apply_base_case(log, root, act_key)
        pre, post = dfg_utils.get_transitive_relations(dfg, alphabet)
        cut = sequence_cut.detect(alphabet, pre, post)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.SEQUENCE, root), threshold, act_key,
                sequence_cut.project(log, cut, act_key), use_msd)
        cut = xor_cut.detect(dfg, alphabet)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.XOR, root), threshold, act_key,
                xor_cut.project(log, cut, act_key), use_msd)
        cut = concurrent_cut.detect(
            dfg,
            alphabet,
            start_activities,
            end_activities,
            msd=msdw_algo.derive_msd_witnesses(
                log,
                msd_algo.apply(log,
                               parameters={
                                   constants.PARAMETER_CONSTANT_ACTIVITY_KEY:
                                   act_key
                               }),
                parameters={
                    constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key
                }) if use_msd else None)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
                concurrent_cut.project(log, cut, act_key), use_msd)
        cut = loop_cut.detect(dfg, alphabet, start_activities, end_activities)
        if cut is not None:
            return _add_operator_recursive(
                pt.ProcessTree(pt.Operator.LOOP, root), threshold, act_key,
                loop_cut.project(log, cut, act_key), use_msd)
    if len(empty_traces) > 0:
        nempty = pm4py.filter_log(lambda t: len(t) > 0, log)
        return _add_operator_recursive(pt.ProcessTree(pt.Operator.XOR,
                                                      root), threshold,
                                       act_key, [EventLog(), nempty], use_msd)
    aopt = activity_once_per_trace.detect(log, alphabet, act_key)
    if aopt is not None:
        operator = pt.ProcessTree(operator=pt.Operator.PARALLEL, parent=root)
        operator.children.append(
            pt.ProcessTree(operator=None, parent=operator, label=aopt))
        return _add_operator_recursive(
            operator, threshold, act_key,
            activity_once_per_trace.project(log, aopt, act_key), use_msd)
    act_conc = activity_concurrent.detect(log, alphabet, act_key, use_msd)
    if act_conc is not None:
        return _add_operator_recursive(
            pt.ProcessTree(pt.Operator.PARALLEL, root), threshold, act_key,
            activity_concurrent.project(log, act_conc, act_key), use_msd)
    stl = strict_tau_loop.detect(log, start_activities, end_activities,
                                 act_key)
    if stl is not None:
        return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP,
                                                      root), threshold,
                                       act_key, [stl, EventLog()], use_msd)
    tl = tau_loop.detect(log, start_activities, act_key)
    if tl is not None:
        return _add_operator_recursive(pt.ProcessTree(pt.Operator.LOOP,
                                                      root), threshold,
                                       act_key, [tl, EventLog()], use_msd)
    return _flower(alphabet, root)