Example #1
0
def find_all_process_most_frequent(varas_group):

    macro_trace_processes = {}

    for vara in varas_group:
        df_vara = df_log[df_log['case: orgao'] == vara]

        p = PreProcess(df=df_vara)
        p.select_desired_columns()
        p.filter_outlier_timestamp()
        p.filter_outlier_movements(lower=0.05, upper=0.95)
        p.filter_outlier_trace_time(lower=0.05, upper=0.95)

        l = Log(df_log=p.df_log.sort_values('time:timestamp'))
        all_macro_trace = find_all_macro_trace(l.log, macrosteps)

        for tran in all_macro_trace:
            if tran not in macro_trace_processes:
                macro_trace_processes[tran] = 0
            macro_trace_processes[tran] += 1

    macro_trace_processes = {k: v for k, v in \
        sorted(macro_trace_processes.items(),
            key=lambda item: item[1])}

    macro_trace_processes
from pm4py.statistics.traces.log import case_statistics
from pm4py.algo.discovery.dfg import parameters
from pm4py.visualization.dfg import visualizer as dfg_visualization

from log.Log import Log
from log.Log import INTENSE_FILTERING
from discovery.DFG import DFG
import visualization.Visualizer as Visualizer
from log.PreProcess import PreProcess

file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\
            'log_vara_2.csv'


p = PreProcess(file_location=file_path)
p.select_desired_columns()
p.filter_outlier_timestamp()
p.filter_outlier_movements(lower=0.01, upper=0.99)
p.filter_outlier_trace_time(lower=0.01, upper=0.99)

l = Log(df_log=p.df_log.sort_values('time:timestamp'))
# l.filter_variants(1.1)

dfg = DFG(l.log,
          parameters={parameters.Parameters.AGGREGATION_MEASURE:'mean'},
          variant=dfg_discovery.Variants.FREQUENCY)

dfg.filter_activities(number_act=10)
dfg.filter_edges(percentage=0.3)

print(dfg.dfg)