def test_case_statistics(self):
     from pm4py.statistics.traces.generic.log import case_statistics
     log = self.get_log()
     case_statistics.get_kde_caseduration(log)
     case_statistics.get_events(log, "N77802")
     case_statistics.get_variant_statistics(log)
     case_statistics.get_cases_description(log)
     case_statistics.get_all_case_durations(log)
     case_statistics.get_first_quartile_case_duration(log)
     case_statistics.get_median_case_duration(log)
Beispiel #2
0
def get_variants_list(log, parameters=None):
    """
    Gets the list of variants (along with their count) from the particular log type

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    -------------
    variants_list
        List of variants of the log (along with their count)
    """
    from pm4py.statistics.traces.generic.pandas import case_statistics as pd_case_statistics
    from pm4py.statistics.traces.generic.log import case_statistics as log_case_statistics

    variants_list = []
    if type(log) is pd.DataFrame:
        pd_variants = pd_case_statistics.get_variant_statistics(
            log, parameters=parameters)
        for var in pd_variants:
            varkeys = list(var.keys())
            del varkeys[varkeys.index("variant")]
            variants_list.append((var["variant"], var[varkeys[0]]))
    else:
        log_variants = log_case_statistics.get_variant_statistics(
            log, parameters=parameters)
        for var in log_variants:
            varkeys = list(var.keys())
            del varkeys[varkeys.index("variant")]
            variants_list.append((var["variant"], var[varkeys[0]]))
    return variants_list
Beispiel #3
0
def sublog2varlist(log, freq_thres, num):
    '''
    extract lists of variants from selected sublogs together with frequency threshold to filter out infrequent variants
    :param log: sublog containing the selected case attribute value
    :param freq_thres: (int) frequency threshold to filter out infrequent variants
    :return: lists of variant strings
    '''
    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    filtered_var_list = []
    filtered_var_list_1 = []
    filtered_var_list_2 = []
    for i in range(len(variants_count)):
        if variants_count[i]['count'] >= freq_thres:
            filtered_var_list_1.append(
                variants_count[i]['variant'])  # variant string
        elif i < num:
            filtered_var_list_2.append(variants_count[i]['variant'])

    # union set ensure the ordered union will be satisfied
    filtered_var_list = filtered_var_list_1 + filtered_var_list_2
    str_var_list = [
        variants_util.get_activities_from_variant(v) for v in filtered_var_list
    ]

    return str_var_list
Beispiel #4
0
def sublog_percent2varlist(log, upper_percent, parameters=None):
    '''
    just need to var list
    :param log: same as sublog2varlist()
    :param freq_thres: same as sublog2varlist()
    :return: dataframe of variants with their counts together with the correspond var_list(until the percent )
    '''

    if parameters is None:
        parameters = {}
    lower_percent = exec_utils.get_param_value(Parameters.LOWER_PERCENT,
                                               parameters, 0)

    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    df = pd.DataFrame.from_dict(variants_count)
    # calculate the cumunative sum
    csum = np.array(df['count']).cumsum()
    csum = csum / csum[-1]
    num_list = csum[csum <= upper_percent]
    num_list_lower = csum[csum <= lower_percent]
    # stop until the percent is satisfied
    df_w_count = df.iloc[len(num_list_lower):len(num_list), :]
    # get correspond var_list
    filtered_var_list = df_w_count['variant'].values.tolist()
    return df_w_count, filtered_var_list
Beispiel #5
0
def sublog_percent(log, upper_percent, parameters=None):
    '''
    change variant dictionary got from sublog into dataframe, so that we can extract the frequency of each variant
    :param log: same as sublog2varlist()
    :param freq_thres: same as sublog2varlist()
    :return: dataframe of variants with their counts together with the correspond var_list(until the percent )
    '''

    if parameters is None:
        parameters = {}
    lower_percent = exec_utils.get_param_value(Parameters.LOWER_PERCENT,
                                               parameters, 0)

    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    df = pd.DataFrame.from_dict(variants_count)
    # calculate the cumunative sum
    csum = np.array(df['count']).cumsum()
    csum = csum / csum[-1]
    num_list = csum[csum <= upper_percent]
    num_list_lower = csum[csum <= lower_percent]
    # stop until the percent is satisfied
    df_w_count = df.iloc[len(num_list_lower):len(num_list), :]
    # get correspond var_list
    filtered_var_list = df_w_count['variant'].values.tolist()
    str_var_list = [
        variants_util.get_activities_from_variant(v) for v in filtered_var_list
    ]

    return df_w_count, str_var_list
Beispiel #6
0
 def test_obtaining_variants(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes")
     log = xes_importer.apply(input_log)
     stats = case_statistics.get_variant_statistics(log)
     del stats
Beispiel #7
0
def sublog2df_num(log, num):
    '''
    change variant dictionary got from sublog into dataframe, so that we can extract the frequency of each variant
    :param log: same as sublog2varlist()
    :param freq_thres: same as sublog2varlist()
    :return: dataframe of variants with their counts
    '''
    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    df = pd.DataFrame.from_dict(variants_count)
    df_w_count = df.iloc[0:num, :]
    return df_w_count
Beispiel #8
0
def sublog2df(log, freq_thres, num):
    '''
    change variant dictionary got from sublog into dataframe, so that we can extract the frequency of each variant
    :param log: same as sublog2varlist()
    :param freq_thres: same as sublog2varlist()
    :return: dataframe of variants with their counts
    '''
    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    df = pd.DataFrame.from_dict(variants_count)
    df_w_count_1 = df[df['count'] >= freq_thres]
    df_w_count_2 = df.iloc[0:num, :]
    # take union of two dataframes
    df_w_count = pd.merge(df_w_count_1,
                          df_w_count_2,
                          how='outer',
                          on=['variant', 'count'])
    # display(df_w_count['variant'])
    return df_w_count
def findAsociationRules():
    """
    This function mines the long-term dependency rules between XOR branches of the process tree. 
    Parameters:
        
    Returns:
        Rules (dict) : Discovered rules between XOR branches of the process tree
        XOR blocks (dict) : Candidate XOR blocks present in the process tree 
    """
    tree = settings.PROCESS_TREE
    log = settings.EVENT_LOG
    # Explore Log
    total_traces = 0
    xor_tree = {}
    rules_dict = {}

    variants_count = case_statistics.get_variant_statistics(log)
    variants_count = sorted(variants_count,
                            key=lambda x: x['count'],
                            reverse=True)
    print("Variants", variants_count)
    rules_values = {}

    for ele in variants_count:
        total_traces += ele['count']

    rule_dicti = {}
    ## Firstly, get all XOR tree list if it has no tau at the leaves.
    xor_tree = {}
    xor_tree = get_xor_trees(tree)
    print(xor_tree)

    ## find all valid XOR combinations
    for i in range(1, len(xor_tree)):
        for j in range(i + 1, len(xor_tree) + 1):
            max_v = 0
            rules_values = {}
            LCA = g.common_ancestor(xor_tree[f'X{i}'], xor_tree[f'X{j}'])
            if LCA.operator == pt_op.SEQUENCE and (
                    pt_op.XOR not in get_ancestors_operator(
                        xor_tree[f'X{i}'], LCA)
            ) and (pt_op.XOR not in get_ancestors_operator(
                    xor_tree[f'X{j}'],
                    LCA)) and (pt_op.LOOP not in get_ancestors_operator(
                        xor_tree[f'X{i}'],
                        LCA)) and (pt_op.LOOP not in get_ancestors_operator(
                            xor_tree[f'X{j}'], LCA)):
                xor_children = []
                source, target = get_candidates(xor_tree[f'X{i}'],
                                                xor_tree[f'X{j}'])
                for s in source:
                    for t in target:
                        values = []
                        support = get_support_updated([s, t], variants_count,
                                                      total_traces, source,
                                                      target)
                        #conf_value = round((support[tuple(s), tuple(t)]/support[tuple(s)]), 3)
                        conf_value = get_confidence([s, t], support[tuple(s),
                                                                    tuple(t)],
                                                    variants_count,
                                                    total_traces)
                        lift_value = get_lift([s, t], conf_value,
                                              variants_count, total_traces)

                        values.append(support[tuple(s), tuple(t)])
                        values.append(conf_value)
                        values.append(lift_value)
                        l = [s, t]
                        rules_values[(f"{s}", f"{t}")] = values
                        if values[2] > max_v:
                            max_v = values[2]
                rules_values['Max'] = max_v
                rule_dicti[(f"X{i}", f"X{j}")] = rules_values

    sorted_rule_dict = dict(
        sorted(rule_dicti.items(),
               key=lambda item: item[1]['Max'],
               reverse=True))
    print("sorted_rule_dict", sorted_rule_dict)
    return sorted_rule_dict, xor_tree