Example #1
0
def sublog2varlist(log, freq_thres, num):
    '''
    extract lists of variants from selected sublogs together with frequency threshold to filter out infrequent variants
    :param log: sublog containing the selected case attribute value
    :param freq_thres: (int) frequency threshold to filter out infrequent variants
    :return: lists of variant strings
    '''
    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    filtered_var_list = []
    filtered_var_list_1 = []
    filtered_var_list_2 = []
    for i in range(len(variants_count)):
        if variants_count[i]['count'] >= freq_thres:
            filtered_var_list_1.append(
                variants_count[i]['variant'])  # variant string
        elif i < num:
            filtered_var_list_2.append(variants_count[i]['variant'])

    # union set ensure the ordered union will be satisfied
    filtered_var_list = filtered_var_list_1 + filtered_var_list_2
    str_var_list = [
        variants_util.get_activities_from_variant(v) for v in filtered_var_list
    ]

    return str_var_list
Example #2
0
def get_language(log, parameters=None):
    """
    Gets the stochastic language of the log (from the variants)

    Parameters
    --------------
    log
        Event log
    parameters
        Parameters

    Returns
    --------------
    dictio
        Dictionary containing the stochastic language of the log
        (variant associated to a number between 0 and 1; the sum is 1)
    """
    vars = get_variants(log, parameters=parameters)
    vars = {
        variants_util.get_activities_from_variant(x): len(y)
        for x, y in vars.items()
    }

    all_values_sum = sum(vars.values())
    for x in vars:
        vars[x] = vars[x] / all_values_sum
    return vars
Example #3
0
def sublog_percent2actlist(log, upper_percent, parameters=None):
    '''
    just need to var list
    :param log: same as sublog2varlist()
    :param freq_thres: same as sublog2varlist()
    :return: dataframe of variants with their counts together with the correspond var_list(until the percent )
    '''

    if parameters is None:
        parameters = {}
    lower_percent = exec_utils.get_param_value(Parameters.LOWER_PERCENT,
                                               parameters, 0)

    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    df = pd.DataFrame.from_dict(variants_count)
    # calculate the cumunative sum
    csum = np.array(df['count']).cumsum()
    csum = csum / csum[-1]
    num_list = csum[csum <= upper_percent]
    num_list_lower = csum[csum <= lower_percent]
    # stop until the percent is satisfied
    df_w_count = df.iloc[len(num_list_lower):len(num_list), :]
    # get correspond var_list
    filtered_var_list = df_w_count['variant'].values.tolist()
    str_var_list = [
        variants_util.get_activities_from_variant(v) for v in filtered_var_list
    ]

    return df_w_count, str_var_list
Example #4
0
def apply_tree_variants(variants, parameters=None):
    """
    Apply the IM algorithm to a dictionary of variants obtaining a process tree

    Parameters
    ----------
    variants
        Variants
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    log = EventLog()
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)

    var_keys = list(variants.keys())
    for var in var_keys:
        trace = Trace()
        activities = variants_util.get_activities_from_variant(var)
        for act in activities:
            trace.append(Event({activity_key: act}))
        log.append(trace)

    return apply_tree(log, parameters=parameters)
Example #5
0
def get_language(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> Union[Dict[List[str], float], Dict[str, float]]:
    """
    Gets the stochastic language of the log (from the variants)

    Parameters
    --------------
    log
        Event log
    parameters
        Parameters

    Returns
    --------------
    dictio
        Dictionary containing the stochastic language of the log
        (variant associated to a number between 0 and 1; the sum is 1)
    """
    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)
    vars = get_variants(log, parameters=parameters)
    vars = {
        variants_util.get_activities_from_variant(x): len(y)
        for x, y in vars.items()
    }

    all_values_sum = sum(vars.values())
    for x in vars:
        vars[x] = vars[x] / all_values_sum
    return vars
Example #6
0
def apply(log, parameters=None):
    """
    Calculates the Working Together metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list. Moreover, last boolean indicates that the metric is
        not directed.
    """

    if parameters is None:
        parameters = {}

    import numpy
    from pm4py.statistics.traces.generic.pandas import case_statistics

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)

    parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}
    variants_occ = {x["variant"]: x["case:concept:name"] for x in
                    case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
    variants_resources = list(variants_occ.keys())
    resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]

    flat_list = sorted(list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        ord_res_list = sorted(list(set(rv)))

        for i in range(len(ord_res_list) - 1):
            res_i = flat_list.index(ord_res_list[i])
            for j in range(i + 1, len(ord_res_list)):
                res_j = flat_list.index(ord_res_list[j])
                metric_matrix[res_i, res_j] += float(variants_occ[rvj]) / float(len(log))
                metric_matrix[res_j, res_i] += float(variants_occ[rvj]) / float(len(log))

    return [metric_matrix, flat_list, False]
Example #7
0
def get_prefix_matrix_from_variants_list(variants_list,
                                         activities,
                                         parameters=None):
    """
    Gets a numeric matrix where each row is associated to a different prefix of activities
    happening in the variants of the log, along with the count of the particular situation

    Parameters
    -------------
    variants_list
        List of variants contained in the log, along with their count
    activities
        List of activities in the log
    parameters
        Parameters of the algorithm

    Returns
    -------------
    prefix_mat
        Prefix matrix of the log
    """
    if parameters is None:
        parameters = {}
    skip_last = parameters[SKIP_LAST] if SKIP_LAST in parameters else False

    prefixes = {}
    for var in variants_list:
        variant = variants_util.get_activities_from_variant(var[0])

        count = var[1]
        prefix = []
        for index, act in enumerate(variant):
            if skip_last and index == len(variant) - 1:
                break
            prefix.append(act)
            prefix_repr = get_prefix_repr(prefix, activities)
            if prefix_repr not in prefixes:
                prefixes[prefix_repr] = 0
            prefixes[prefix_repr] = prefixes[prefix_repr] + count
    prefix_mat = []
    for pref in prefixes:
        pref_list = copy(list(pref))
        for i in range(len(pref_list)):
            pref_list[i] = pref_list[i] * prefixes[pref]
        prefix_mat.append(pref_list)
    prefix_mat = np.asmatrix(prefix_mat)
    prefix_mat = np.unique(prefix_mat, axis=0)
    return prefix_mat, activities
Example #8
0
def apply(log: EventLog, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[Any]:
    """
    Calculates the Working Together metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list. Moreover, last boolean indicates that the metric is
        not directed.
    """

    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)

    parameters_variants = {variants_filter.Parameters.ACTIVITY_KEY: resource_key,
                           variants_filter.Parameters.ATTRIBUTE_KEY: resource_key}
    variants_occ = {x: len(y) for x, y in variants_filter.get_variants(log, parameters=parameters_variants).items()}
    variants_resources = list(variants_occ.keys())
    resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]

    flat_list = sorted(list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]

        ord_res_list = sorted(list(set(rv)))

        for i in range(len(ord_res_list) - 1):
            res_i = flat_list.index(ord_res_list[i])
            for j in range(i + 1, len(ord_res_list)):
                res_j = flat_list.index(ord_res_list[j])
                metric_matrix[res_i, res_j] += float(variants_occ[rvj]) / float(len(log))
                metric_matrix[res_j, res_i] += float(variants_occ[rvj]) / float(len(log))

    return [metric_matrix, flat_list, False]
Example #9
0
def check_is_fitting(*args, activity_key=xes_constants.DEFAULT_NAME_KEY):
    """
    Checks if a trace object is fit against a process model

    Parameters
    -----------------
    trace
        Trace object (trace / variant)
    model
        Model (process tree, Petri net, BPMN, ...)
    activity_key
        Activity key (optional)

    Returns
    -----------------
    is_fit
        Boolean value (True if the trace fits; False if the trace does not)
    """
    from pm4py.util import variants_util
    from pm4py.convert import convert_to_process_tree, convert_to_petri_net

    trace = args[0]
    model = args[1:]

    try:
        model = convert_to_process_tree(*model)
    except:
        # the model cannot be expressed as a process tree, let's say if at least can be expressed as a Petri net
        model = convert_to_petri_net(*model)

    if not isinstance(trace, Trace):
        activities = variants_util.get_activities_from_variant(trace)
        trace = Trace()
        for act in activities:
            trace.append(Event({activity_key: act}))

    if isinstance(model, ProcessTree):
        return __check_is_fit_process_tree(trace, model, activity_key=activity_key)
    elif isinstance(model, tuple) and isinstance(model[0], PetriNet):
        return __check_is_fit_petri_net(trace, model[0], model[1], model[2], activity_key=activity_key)
Example #10
0
def get_variants_matrix_from_variants_list(variants_list,
                                           activities,
                                           parameters=None):
    """
    Gets a numeric matrix where each row is associated to a different set of activities
    happening in the (complete) variants of the log, along with the count of the particular
    situation

    Parameters
    -------------
    variants_list
        List of variants contained in the log, along with their count
    activities
        List of activities in the log
    parameters
        Parameters of the algorithm: keep_unique (default: True)

    Returns
    -------------
    variants_matrix
        Variants matrix of the log
    """
    if parameters is None:
        parameters = {}
    keep_unique = parameters[KEEP_UNIQUE] if KEEP_UNIQUE in parameters else True
    variants_mat = []
    for var in variants_list:
        variant = variants_util.get_activities_from_variant(var[0])
        count = var[1]
        this_var_repr = [0] * len(activities)
        for act in variant:
            i = activities.index(act)
            this_var_repr[i] = this_var_repr[i] + count
        variants_mat.append(this_var_repr)
    variants_mat = np.asmatrix(variants_mat)
    if keep_unique:
        variants_mat = np.unique(variants_mat, axis=0)
    return variants_mat, activities
Example #11
0
def get_dfg_sa_ea_act_from_variants(variants, parameters=None):
    """
    Gets the DFG, the start and end activities, and the activities
    from the dictionary/set/list of variants in the log

    Parameters
    ---------------
    variants
        Dictionary/set/list of variants
    parameters
        Parameters of the algorithm, including:
        - variants_sep: the delimiter splitting activities in a variant

    Returns
    --------------
    dfg
        DFG
    list_act
        List of different activities
    start_activities
        Start activities
    end_activities
        End activities
    """
    if parameters is None:
        parameters = {}
    variants = set(
        variants_util.get_activities_from_variant(v) for v in variants)
    dfg = dict(
        Counter(
            list((x[i], x[i + 1]) for x in variants
                 for i in range(len(x) - 1))))
    list_act = list(set(y for x in variants for y in x))
    start_activities = dict(Counter(x[0] for x in variants if x))
    end_activities = dict(Counter(x[-1] for x in variants if x))
    return dfg, list_act, start_activities, end_activities
Example #12
0
def apply(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> List[Any]:
    """
    Calculates the HW metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm:
            Parameters.BETA -> beta value as described in the Wil SNA paper

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list. Moreover, last boolean indicates that the metric is
        directed.
    """
    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY,
                                              parameters,
                                              xes.DEFAULT_RESOURCE_KEY)
    beta = exec_utils.get_param_value(Parameters.BETA, parameters, 0)

    parameters_variants = {
        variants_filter.Parameters.ACTIVITY_KEY: resource_key,
        variants_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }
    variants_occ = {
        x: len(y)
        for x, y in variants_filter.get_variants(
            log, parameters=parameters_variants).items()
    }
    variants_resources = list(variants_occ.keys())
    resources = [
        variants_util.get_activities_from_variant(y)
        for y in variants_resources
    ]

    flat_list = sorted(
        list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    sum_i_to_j = {}
    dividend = 0

    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        for i in range(len(rv) - 1):
            res_i = flat_list.index(rv[i])
            if not res_i in sum_i_to_j:
                sum_i_to_j[res_i] = {}
            for j in range(i + 1, len(rv)):
                res_j = flat_list.index(rv[j])
                if not res_j in sum_i_to_j[res_i]:
                    sum_i_to_j[res_i][res_j] = 0
                if beta == 0:
                    sum_i_to_j[res_i][res_j] += variants_occ[rvj]
                    dividend += variants_occ[rvj]
                    break
                else:
                    sum_i_to_j[res_i][res_j] += variants_occ[rvj] * (beta**(
                        j - i - 1))
                    dividend += variants_occ[rvj] * (beta**(j - i - 1))

    for key1 in sum_i_to_j:
        for key2 in sum_i_to_j[key1]:
            metric_matrix[key1][key2] = sum_i_to_j[key1][key2] / dividend

    return [metric_matrix, flat_list, True]
Example #13
0
def apply(log, parameters=None):
    """
    Calculates the HW metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm:
            Paramters.BETA -> beta value as described in the Wil SNA paper

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list. Moreover, last boolean indicates that the metric is
        directed.
    """
    if parameters is None:
        parameters = {}

    import numpy
    from pm4py.statistics.traces.pandas import case_statistics

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes.DEFAULT_RESOURCE_KEY)
    beta = exec_utils.get_param_value(Parameters.BETA, parameters, 0)

    parameters_variants = {case_statistics.Parameters.ACTIVITY_KEY: resource_key,
                           case_statistics.Parameters.ATTRIBUTE_KEY: resource_key}

    variants_occ = {x["variant"]: x["case:concept:name"] for x in
                    case_statistics.get_variant_statistics(log, parameters=parameters_variants)}
    variants_resources = list(variants_occ.keys())
    resources = [variants_util.get_activities_from_variant(y) for y in variants_resources]

    flat_list = sorted(list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    sum_i_to_j = {}

    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        for i in range(len(rv) - 1):
            res_i = flat_list.index(rv[i])
            if not res_i in sum_i_to_j:
                sum_i_to_j[res_i] = {}
            for j in range(i + 1, len(rv)):
                res_j = flat_list.index(rv[j])
                if not res_j in sum_i_to_j[res_i]:
                    sum_i_to_j[res_i][res_j] = 0
                if beta == 0:
                    sum_i_to_j[res_i][res_j] += variants_occ[rvj]
                    break
                else:
                    sum_i_to_j[res_i][res_j] += variants_occ[rvj] * (beta ** (j - i - 1))

    dividend = 0
    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        if beta == 0:
            dividend = dividend + variants_occ[rvj] * (len(rv) - 1)
        else:
            dividend = dividend + variants_occ[rvj] * (len(rv) - 1)

    for key1 in sum_i_to_j:
        for key2 in sum_i_to_j[key1]:
            metric_matrix[key1][key2] = sum_i_to_j[key1][key2] / dividend

    return [metric_matrix, flat_list, True]
Example #14
0
def apply(log, net, initial_marking, final_marking, parameters=None):
    """
    Method to apply token-based replay

    Parameters
    -----------
    log
        Log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    parameters
        Parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    for t in net.transitions:
        ma = Marking()
        for a in t.out_arcs:
            p = a.target
            ma[p] = a.weight
        t.out_marking = ma

    for t in net.transitions:
        ma = Marking()
        for a in t.in_arcs:
            p = a.source
            ma[p] = a.weight
        t.in_marking = ma

    variants_idxs = variants_filter.get_variants_from_log_trace_idx(
        log, parameters=parameters)
    results = []

    tmap = {}
    bmap = {}
    for t in net.transitions:
        if t.label is not None:
            if t.label not in tmap:
                tmap[t.label] = []
            tmap[t.label].append(t)

    for variant in variants_idxs:
        vlist = variants_util.get_activities_from_variant(variant)
        result = tr_vlist(vlist,
                          net,
                          initial_marking,
                          final_marking,
                          tmap,
                          bmap,
                          parameters=parameters)
        results.append(result)

    al_idx = {}
    for index_variant, variant in enumerate(variants_idxs):
        for trace_idx in variants_idxs[variant]:
            al_idx[trace_idx] = results[index_variant]

    ret = []
    for i in range(len(log)):
        ret.append(al_idx[i])

    return ret
Example #15
0
def apply(log, parameters=None):
    """
    Calculates the Subcontracting metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm:
            Parameters.N -> n of the algorithm proposed in the Wil SNA paper

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list
    """
    if parameters is None:
        parameters = {}

    import numpy
    from pm4py.statistics.traces.generic.pandas import case_statistics

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY,
                                              parameters,
                                              xes.DEFAULT_RESOURCE_KEY)
    n = exec_utils.get_param_value(Parameters.N, parameters, 2)

    parameters_variants = {
        case_statistics.Parameters.ACTIVITY_KEY: resource_key,
        case_statistics.Parameters.ATTRIBUTE_KEY: resource_key
    }
    variants_occ = {
        x["variant"]: x["case:concept:name"]
        for x in case_statistics.get_variant_statistics(
            log, parameters=parameters_variants)
    }
    variants_resources = list(variants_occ.keys())
    resources = [
        variants_util.get_activities_from_variant(y)
        for y in variants_resources
    ]

    flat_list = sorted(
        list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    sum_i_to_j = {}

    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        for i in range(len(rv) - n):
            res_i = flat_list.index(rv[i])
            res_i_n = flat_list.index(rv[i + n])
            if res_i == res_i_n:
                if res_i not in sum_i_to_j:
                    sum_i_to_j[res_i] = {}
                    for j in range(i + 1, i + n):
                        res_j = flat_list.index(rv[j])
                        if res_j not in sum_i_to_j[res_i]:
                            sum_i_to_j[res_i][res_j] = 0
                        sum_i_to_j[res_i][res_j] += variants_occ[rvj]

    dividend = 0
    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        dividend = dividend + variants_occ[rvj] * (len(rv) - 1)

    for key1 in sum_i_to_j:
        for key2 in sum_i_to_j[key1]:
            metric_matrix[key1][key2] = sum_i_to_j[key1][key2] / dividend

    return [metric_matrix, flat_list, True]
Example #16
0
def apply(
    log: EventLog,
    parameters: Optional[Dict[Union[str, Parameters],
                              Any]] = None) -> List[Any]:
    """
    Calculates the Subcontracting metric

    Parameters
    ------------
    log
        Log
    parameters
        Possible parameters of the algorithm:
            Parameters.N -> n of the algorithm proposed in the Wil SNA paper

    Returns
    -----------
    tuple
        Tuple containing the metric matrix and the resources list
    """
    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY,
                                              parameters,
                                              xes.DEFAULT_RESOURCE_KEY)
    n = exec_utils.get_param_value(Parameters.N, parameters, 2)

    parameters_variants = {
        variants_filter.Parameters.ACTIVITY_KEY: resource_key,
        variants_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }
    variants_occ = {
        x: len(y)
        for x, y in variants_filter.get_variants(
            log, parameters=parameters_variants).items()
    }
    variants_resources = list(variants_occ.keys())
    resources = [
        variants_util.get_activities_from_variant(y)
        for y in variants_resources
    ]

    flat_list = sorted(
        list(set([item for sublist in resources for item in sublist])))

    metric_matrix = numpy.zeros((len(flat_list), len(flat_list)))

    sum_i_to_j = {}
    dividend = 0

    for idx, rv in enumerate(resources):
        rvj = variants_resources[idx]
        dividend += variants_occ[rvj]

        for i in range(len(rv) - n):
            res_i = flat_list.index(rv[i])
            res_i_n = flat_list.index(rv[i + n])
            if res_i == res_i_n:
                if res_i not in sum_i_to_j:
                    sum_i_to_j[res_i] = {}
                    for j in range(i + 1, i + n):
                        res_j = flat_list.index(rv[j])
                        if res_j not in sum_i_to_j[res_i]:
                            sum_i_to_j[res_i][res_j] = 0
                        sum_i_to_j[res_i][res_j] += variants_occ[rvj]

    for key1 in sum_i_to_j:
        for key2 in sum_i_to_j[key1]:
            metric_matrix[key1][key2] = sum_i_to_j[key1][key2] / dividend

    return [metric_matrix, flat_list, True]