def find_all_process_most_frequent(varas_group): macro_trace_processes = {} for vara in varas_group: df_vara = df_log[df_log['case: orgao'] == vara] p = PreProcess(df=df_vara) p.select_desired_columns() p.filter_outlier_timestamp() p.filter_outlier_movements(lower=0.05, upper=0.95) p.filter_outlier_trace_time(lower=0.05, upper=0.95) l = Log(df_log=p.df_log.sort_values('time:timestamp')) all_macro_trace = find_all_macro_trace(l.log, macrosteps) for tran in all_macro_trace: if tran not in macro_trace_processes: macro_trace_processes[tran] = 0 macro_trace_processes[tran] += 1 macro_trace_processes = {k: v for k, v in \ sorted(macro_trace_processes.items(), key=lambda item: item[1])} macro_trace_processes
from pm4py.statistics.traces.log import case_statistics from pm4py.algo.discovery.dfg import parameters from pm4py.visualization.dfg import visualizer as dfg_visualization from log.Log import Log from log.Log import INTENSE_FILTERING from discovery.DFG import DFG import visualization.Visualizer as Visualizer from log.PreProcess import PreProcess file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\ 'log_vara_2.csv' p = PreProcess(file_location=file_path) p.select_desired_columns() p.filter_outlier_timestamp() p.filter_outlier_movements(lower=0.01, upper=0.99) p.filter_outlier_trace_time(lower=0.01, upper=0.99) l = Log(df_log=p.df_log.sort_values('time:timestamp')) # l.filter_variants(1.1) dfg = DFG(l.log, parameters={parameters.Parameters.AGGREGATION_MEASURE:'mean'}, variant=dfg_discovery.Variants.FREQUENCY) dfg.filter_activities(number_act=10) dfg.filter_edges(percentage=0.3) print(dfg.dfg)