def execute_script(): log_path = os.path.join("..", "tests", "input_data", "interval_event_log.xes") #log_path = os.path.join("..", "tests", "input_data", "reviewing.xes") log = xes_importer.apply(log_path) parameters = {} parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp" parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp" parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name" parameters["strict"] = False parameters["format"] = "svg" start_activities = sa_get.get_start_activities(log, parameters=parameters) end_activities = ea_get.get_end_activities(log, parameters=parameters) parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities soj_time = soj_time_get.apply(log, parameters=parameters) print("soj_time") print(soj_time) conc_act = conc_act_get.apply(log, parameters=parameters) print("conc_act") print(conc_act) efg = efg_get.apply(log, parameters=parameters) print("efg") print(efg) dfg_freq = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.FREQUENCY) dfg_perf = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.PERFORMANCE) dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, log=log, variant=dfg_vis_fact.Variants.FREQUENCY, parameters=parameters) dfg_vis_fact.view(dfg_gv_freq) dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, log=log, variant=dfg_vis_fact.Variants.PERFORMANCE, parameters=parameters) dfg_vis_fact.view(dfg_gv_perf) net, im, fm = dfg_conv.apply(dfg_freq) gviz = pn_vis.apply(net, im, fm, parameters=parameters) pn_vis.view(gviz)
def dfg_dist_calc_minkowski(log1, log2, alpha): act1 = attributes_filter.get_attribute_values(log1, "concept:name") act2 = attributes_filter.get_attribute_values(log2, "concept:name") dfg1 = dfg_algorithm.apply(log1) dfg2 = dfg_algorithm.apply(log2) df1_act = act_dist_calc.occu_var_act(act1) df2_act = act_dist_calc.occu_var_act(act2) df1_dfg = act_dist_calc.occu_var_act(dfg1) df2_dfg = act_dist_calc.occu_var_act(dfg2) df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0) df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0) dist_act = pdist(np.array([ df_act['freq_x'].values / np.sum(df_act['freq_x'].values), df_act['freq_y'].values / np.sum(df_act['freq_y'].values) ]), 'minkowski', p=2.)[0] dist_dfg = pdist(np.array([ df_dfg['freq_x'].values / np.sum(df_dfg['freq_x'].values), df_dfg['freq_y'].values / np.sum(df_dfg['freq_y'].values) ]), 'minkowski', p=2.)[0] dist = dist_act * alpha + dist_dfg * (1 - alpha) return dist
def dfg_dist_calc_suc(log1, log2): dfg1 = dfg_algorithm.apply(log1) dfg2 = dfg_algorithm.apply(log2) df1_dfg = act_dist_calc.occu_var_act(dfg1) df2_dfg = act_dist_calc.occu_var_act(dfg2) df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0) dist_dfg = pdist( np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]), 'cosine')[0] return dist_dfg
def extract_performance_of_direct_follows_relationships(logs): results = [] for log in logs: graph = dfg_discovery.apply(log) graph = dfg_discovery.apply(log, variant="performance") log_results = [] for element in set(graph): log_results.append((str(element),graph[element])) results.append(log_results) return results
def apply_heu(log, parameters=None): """ Discovers an Heuristics Net using Heuristics Miner Parameters ------------ log Event log parameters Possible parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY - Parameters.DEPENDENCY_THRESH - Parameters.AND_MEASURE_THRESH - Parameters.MIN_ACT_COUNT - Parameters.MIN_DFG_OCCURRENCES - Parameters.DFG_PRE_CLEANING_NOISE_THRESH - Parameters.LOOP_LENGTH_TWO_THRESH Returns ------------ heu Heuristics Net """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_activities = log_sa_filter.get_start_activities( log, parameters=parameters) end_activities = log_ea_filter.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values( log, activity_key, parameters=parameters) activities = list(activities_occurrences.keys()) dfg = dfg_alg.apply(log, parameters=parameters) parameters_w2 = deepcopy(parameters) parameters_w2["window"] = 2 dfg_window_2 = dfg_alg.apply(log, parameters=parameters_w2) freq_triples = dfg_alg.apply(log, parameters=parameters, variant=dfg_alg.Variants.FREQ_TRIPLES) return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences, start_activities=start_activities, end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples, parameters=parameters)
def execute_script(): log_input_directory = "xesinput" all_logs_names = os.listdir(log_input_directory) all_logs_names = [log for log in all_logs_names if ".xe" in log] for logName in all_logs_names: # logPath = os.path.join("..", "tests", "inputData", logName) log_path = log_input_directory + "\\" + logName log = xes_importer.apply(log_path) print("\n\n") print("log loaded") print("Number of traces - ", len(log)) event_log = log_conversion.apply( log, variant=log_conversion.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers) exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName print("exporting log", exp_log_name) xes_exporter.apply(log, exp_log_name) print("exported log", exp_log_name) log, classifier_attr_key = insert_classifier.search_act_class_attr(log) classifiers = list(log.classifiers.keys()) if classifier_attr_key is None and classifiers: try: print(classifiers) log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute( log, classifiers[0]) print(classifier_attr_key) except: print("exception in handling classifier") if classifier_attr_key is None: classifier_attr_key = "concept:name" if len(event_log) > 0 and classifier_attr_key in event_log[0]: parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key } dfg = dfg_algorithm.apply(log, parameters=parameters) gviz = dfg_vis.apply(dfg, log=log, variant="frequency", parameters=parameters) # dfg_vis.view(gviz) dfg_vis.save(gviz, "xescert_images\\" + logName.replace("xes", "png")) print("Reimporting log file just exported - ", exp_log_name) log = xes_importer.apply(exp_log_name) print("log loaded", exp_log_name) print("Number of traces - ", len(log)) event_log = log_conversion.apply( log, variant=log_conversion.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers)
def apply_tree( event_log: Union[pd.DataFrame, EventLog, EventStream], parameters: Optional[Dict[Union[Parameters, str], Any]] = None) -> ProcessTree: if parameters is None: parameters = {} event_log = log_converter.apply( event_log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY.value, parameters, xes_constants.DEFAULT_NAME_KEY) threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters, 0.0) if threshold == 0.0: # keep one trace per variant; more performant event_log = filtering_utils.keep_one_trace_per_variant( event_log, parameters=parameters) tree = __inductive_miner( event_log, discover_dfg.apply(event_log, parameters=parameters), threshold, None, act_key, exec_utils.get_param_value(Parameters.USE_MSD_PARALLEL_CUT, parameters, True)) tree_consistency.fix_parent_pointers(tree) tree = generic.fold(tree) generic.tree_sort(tree) return tree
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "running-example.xes")) frequency_dfg = dfg_miner.apply(log, variant=dfg_miner.Variants.FREQUENCY) net, im, fm = dfg_conv.apply(frequency_dfg) # perform the Montecarlo simulation with the arrival rate inferred by the log (the simulation lasts 5 secs) parameters = {} parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value. Parameters.TOKEN_REPLAY_VARIANT] = Variants.BACKWARDS parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value. Parameters.PARAM_ENABLE_DIAGNOSTICS] = False parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value. Parameters.PARAM_MAX_THREAD_EXECUTION_TIME] = 5 log, res = montecarlo_simulation.apply(log, net, im, fm, parameters=parameters) print( "\n(Montecarlo - Petri net) case arrival ratio inferred from the log") print(res["median_cases_ex_time"]) print(res["total_cases_time"]) # perform the Montecarlo simulation with the arrival rate specified (the simulation lasts 5 secs) parameters[montecarlo_simulation.Variants.PETRI_SEMAPH_FIFO.value. Parameters.PARAM_CASE_ARRIVAL_RATIO] = 60 log, res = montecarlo_simulation.apply(log, net, im, fm, parameters=parameters) print( "\n(Montecarlo - Petri net) case arrival ratio specified by the user") print(res["median_cases_ex_time"]) print(res["total_cases_time"])
def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]: """ Discovers a DFG from a log Parameters -------------- log Event log Returns -------------- dfg DFG start_activities Start activities end_activities End activities """ if check_is_dataframe(log): check_dataframe_columns(log) from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph dfg = get_dfg_graph(log) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log) end_activities = end_activities_module.get_end_activities(log) else: from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log) end_activities = end_activities_module.get_end_activities(log) return dfg, start_activities, end_activities
def detect(log: EventLog, alphabet: Dict[str, int], act_key: str, use_msd: bool) -> Optional[str]: candidates = set(alphabet.keys()) for t in log: candidates = candidates.intersection(set(map(lambda e: e[act_key], t))) if len(candidates) == 0: return None for a in candidates: proj = EventLog() for t in log: proj.append(pm4py.filter_trace(lambda e: e[act_key] != a, t)) if len(list(filter(lambda t: len(t) == 0, proj))) == 0: dfg_proj = discover_dfg.apply(proj, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) alphabet_proj = pm4py.get_attribute_values(proj, act_key) start_act_proj = get_starters.get_start_activities(proj, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) end_act_proj = get_ends.get_end_activities(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) pre_proj, post_proj = dfg_utils.get_transitive_relations(dfg_proj, alphabet_proj) cut = sequence_cut.detect(alphabet_proj, pre_proj, post_proj) if cut is not None: return a cut = xor_cut.detect(dfg_proj, alphabet_proj) if cut is not None: return a cut = concurrent_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj, msd= msdw_algo.derive_msd_witnesses(proj, msd_algo.apply(log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}), parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}) if use_msd else None) if cut is not None: return a cut = loop_cut.detect(dfg_proj, alphabet_proj, start_act_proj, end_act_proj) if cut is not None: return a return None
def discover_dfg(log): """ Discovers a DFG from a log_skeleton Parameters -------------- log Event log_skeleton Returns -------------- dfg DFG start_activities Start activities end_activities End activities """ from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log) end_activities = end_activities_module.get_end_activities(log) return dfg, start_activities, end_activities
def case_filter_dfg(request): event_logs_path = os.path.join(settings.MEDIA_ROOT, "event_logs") log_information = None # TODO Load the Log Information, else throw/redirect to Log Selection if "current_log" in request.session and request.session["current_log"] is not None: log_information = request.session["current_log"] print(log_information) if log_information is not None: event_log = os.path.join(event_logs_path, log_information["log_name"]) log_format = log_import.get_log_format(log_information["log_name"]) # Import the Log considering the given Format log, activities = log_import.log_import(event_log, log_format, log_information) if request.method == "POST": selected_case = request.POST["selected_case"] if log_format == "xes": filtered_log = pm4py.filter_trace_attribute_values( log, log_information["case_id"], [selected_case], retain=True) else: filtered_log = log[log["case:concept:name"].isin([selected_case])] dfg = dfg_discovery.apply(filtered_log) this_data, temp_file = plotting.dfg_to_g6(dfg) re.escape(temp_file) message = { "success": True, "data": json.dumps(this_data), "responseText": "Inactivated successfully!", } return JsonResponse(message)
def test_alpha_miner_log(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import algorithm as alpha_miner net1, im1, fm1 = alpha_miner.apply(log, variant=alpha_miner.Variants.ALPHA_VERSION_CLASSIC) net2, im2, fm2 = alpha_miner.apply(log, variant=alpha_miner.Variants.ALPHA_VERSION_PLUS) from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log) net3, im3, fm3 = alpha_miner.apply_dfg(dfg, variant=alpha_miner.Variants.ALPHA_VERSION_CLASSIC)
def __init__(self, log, parameters=None, variant=dfg_discovery.Variants.FREQUENCY): self.dfg = dfg_discovery.apply(log, parameters=parameters, variant=variant)
def extract_direct_follows_relationships(logs): results = [] for log in logs: graph = dfg_discovery.apply(log) log_results = [] for element in set(graph.elements()): log_results.append((str(element),graph[element])) results.append(log_results) return results
def test_46(self): from pm4py.objects.log.importer.xes import importer as xes_importer log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.dfg import algorithm as dfg_discovery dfg = dfg_discovery.apply(log) from pm4py.objects.conversion.dfg import converter as dfg_mining net, im, fm = dfg_mining.apply(dfg)
def test_exporting_dfg(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) dfg = dfg_discovery.apply(log) dfg_exporter.apply( dfg, os.path.join("test_output_data", "running-example.dfg")) dfg, sa, ea = dfg_importer.apply( os.path.join("test_output_data", "running-example.dfg")) os.remove(os.path.join("test_output_data", "running-example.dfg"))
def apply(log, parameters=None): """ Discovers a footprint object from an event log (the footprints are returned case-by-case) Parameters -------------- log Log parameters Parameters of the algorithm: - Parameters.ACTIVITY_KEY Returns -------------- footprints_obj List of footprints for the cases of the log """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, variant=converter.TO_EVENT_LOG, parameters=parameters) ret = [] for trace in log: dfg = dfg_discovery.apply(EventLog([trace]), parameters=parameters) parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg} sequence = {(x, y) for (x, y) in dfg if not (y, x) in dfg} trace = tuple(x[activity_key] for x in trace) activities = set(trace) if len(trace) > 0: start_activities = {trace[0]} end_activities = {trace[-1]} else: start_activities = set() end_activities = set() ret.append({ Outputs.DFG.value: dfg, Outputs.SEQUENCE.value: sequence, Outputs.PARALLEL.value: parallel, Outputs.ACTIVITIES.value: activities, Outputs.START_ACTIVITIES.value: start_activities, Outputs.END_ACTIVITIES.value: end_activities, Outputs.MIN_TRACE_LENGTH.value: len(trace), Outputs.TRACE.value: trace }) return ret
def apply(log, parameters=None): """ Discovers a footprint object from an event log (the footprints of the event log are returned) Parameters -------------- log Log parameters Parameters of the algorithm: - Parameters.ACTIVITY_KEY Returns -------------- footprints_obj Footprints object """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, variant=converter.TO_EVENT_LOG, parameters=parameters) dfg = dfg_discovery.apply(log, parameters=parameters) parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg} sequence = set( causal_discovery.apply(dfg, causal_discovery.Variants.CAUSAL_ALPHA)) start_activities = set( get_start_activities.get_start_activities(log, parameters=parameters)) end_activities = set( get_end_activities.get_end_activities(log, parameters=parameters)) activities = set(y[activity_key] for x in log for y in x) return { Outputs.DFG.value: dfg, Outputs.SEQUENCE.value: sequence, Outputs.PARALLEL.value: parallel, Outputs.START_ACTIVITIES.value: start_activities, Outputs.END_ACTIVITIES.value: end_activities, Outputs.ACTIVITIES.value: activities, Outputs.MIN_TRACE_LENGTH.value: min(len(x) for x in log) if len(log) > 0 else 0 }
def test_exporting_dfg_with_sa_ea(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) dfg = dfg_discovery.apply(log) sa = start_activities.get_start_activities(log) ea = end_activities.get_end_activities(log) dfg_exporter.apply(dfg, os.path.join("test_output_data", "running-example.dfg"), parameters={dfg_exporter.Variants.CLASSIC.value.Parameters.START_ACTIVITIES: sa, dfg_exporter.Variants.CLASSIC.value.Parameters.END_ACTIVITIES: ea}) dfg, sa, ea = dfg_importer.apply(os.path.join("test_output_data", "running-example.dfg")) os.remove(os.path.join("test_output_data", "running-example.dfg"))
def discover_dfg_miner(log): dfg = dfg_discovery.apply(log) sa = sa_get.get_start_activities(log) ea = ea_get.get_end_activities(log) net, im, fm = dfg_converter.apply(dfg, parameters={ "start_activities": sa, "end_activities": ea }) return net, im, fm
def test_44(self): import os from pm4py.objects.log.importer.xes import importer as xes_importer log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.dfg import algorithm as dfg_discovery from pm4py.visualization.dfg import visualizer as dfg_visualization dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE) gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE)
def discover_abstraction_log( log: EventLog, parameters: Optional[Dict[Any, Any]] = None ) -> Tuple[Any, Any, Any, Any, Any, Any, Any]: """ Discovers an abstraction from a log that is useful for the Heuristics Miner ++ algorithm Parameters -------------- log Event log parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.START_TIMESTAMP_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY Returns -------------- start_activities Start activities end_activities End activities activities_occurrences Activities along with their number of occurrences dfg Directly-follows graph performance_dfg (Performance) Directly-follows graph sojourn_time Sojourn time for each activity concurrent_activities Concurrent activities """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_activities = log_sa.get_start_activities(log, parameters=parameters) end_activities = log_ea.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values( log, activity_key, parameters=parameters) efg_parameters = copy(parameters) efg_parameters[efg_get.Parameters.KEEP_FIRST_FOLLOWING] = True dfg = efg_get.apply(log, parameters=efg_parameters) performance_dfg = dfg_alg.apply(log, variant=dfg_alg.Variants.PERFORMANCE, parameters=parameters) sojourn_time = soj_get.apply(log, parameters=parameters) concurrent_activities = conc_act_get.apply(log, parameters=parameters) return (start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities)
def dfg_dist_calc(log1, log2): act1 = attributes_filter.get_attribute_values(log1, "concept:name") act2 = attributes_filter.get_attribute_values(log2, "concept:name") dfg1 = dfg_algorithm.apply(log1) dfg2 = dfg_algorithm.apply(log2) df1_act = act_dist_calc.occu_var_act(act1) df2_act = act_dist_calc.occu_var_act(act2) df1_dfg = act_dist_calc.occu_var_act(dfg1) df2_dfg = act_dist_calc.occu_var_act(dfg2) df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0) df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0) dist_act = pdist( np.array([df_act['freq_x'].values, df_act['freq_y'].values]), 'cosine')[0] dist_dfg = pdist( np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]), 'cosine')[0] if (np.isnan(dist_dfg) == True): dist_dfg = 1 return dist_act, dist_dfg
def discover_performance_dfg(log: Union[EventLog, pd.DataFrame], business_hours: bool = False, worktiming: List[int] = [7, 17], weekends: List[int] = [6, 7], workcalendar=constants.DEFAULT_BUSINESS_HOURS_WORKCALENDAR) -> Tuple[dict, dict, dict]: """ Discovers a performance directly-follows graph from an event log Parameters --------------- log Event log business_hours Enables/disables the computation based on the business hours (default: False) worktiming (If the business hours are enabled) The hour range in which the resources of the log are working (default: 7 to 17) weekends (If the business hours are enabled) The weekends days (default: Saturday (6), Sunday (7)) Returns --------------- performance_dfg Performance DFG start_activities Start activities end_activities End activities """ if type(log) not in [pd.DataFrame, EventLog, EventStream]: raise Exception("the method can be applied only to a traditional event log!") if check_is_pandas_dataframe(log): check_pandas_dataframe_columns(log) from pm4py.util import constants properties = get_properties(log) from pm4py.algo.discovery.dfg.adapters.pandas.df_statistics import get_dfg_graph activity_key = properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in properties else xes_constants.DEFAULT_NAME_KEY timestamp_key = properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in properties else xes_constants.DEFAULT_TIMESTAMP_KEY case_id_key = properties[constants.PARAMETER_CONSTANT_CASEID_KEY] if constants.PARAMETER_CONSTANT_CASEID_KEY in properties else constants.CASE_CONCEPT_NAME dfg = get_dfg_graph(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_glue=case_id_key, measure="performance", perf_aggregation_key="all", business_hours=business_hours, worktiming=worktiming, weekends=weekends, workcalendar=workcalendar) from pm4py.statistics.start_activities.pandas import get as start_activities_module from pm4py.statistics.end_activities.pandas import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) else: from pm4py.algo.discovery.dfg.variants import performance as dfg_discovery properties = get_properties(log) properties[dfg_discovery.Parameters.AGGREGATION_MEASURE] = "all" properties[dfg_discovery.Parameters.BUSINESS_HOURS] = business_hours properties[dfg_discovery.Parameters.WORKTIMING] = worktiming properties[dfg_discovery.Parameters.WEEKENDS] = weekends dfg = dfg_discovery.apply(log, parameters=properties) from pm4py.statistics.start_activities.log import get as start_activities_module from pm4py.statistics.end_activities.log import get as end_activities_module start_activities = start_activities_module.get_start_activities(log, parameters=properties) end_activities = end_activities_module.get_end_activities(log, parameters=properties) return dfg, start_activities, end_activities
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "running-example.xes")) performance_dfg = dfg_miner.apply(log, variant=dfg_miner.Variants.PERFORMANCE) reach_graph, tang_reach_graph, stochastic_map, q_matrix = ctmc.get_tangible_reachability_and_q_matrix_from_dfg_performance( performance_dfg) # pick the source state state = [x for x in tang_reach_graph.states if x.name == "source1"][0] # analyse the distribution over the states of the system starting from the source after 86400.0 seconds (1 day) transient_result = ctmc.transient_analysis_from_tangible_q_matrix_and_single_state( tang_reach_graph, q_matrix, state, 86400.0) print(transient_result)
def gerar_previsoes_modelo_from_log_eventos(eventLog): dfg_perf = dfg_discovery.apply(eventLog, variant=dfg_discovery.Variants.PERFORMANCE) sa = start_activities.get_start_activities(eventLog) ea = end_activities.get_end_activities(eventLog) reach_graph, tang_reach_graph, stochastic_map, q_matrix = ctmc.get_tangible_reachability_and_q_matrix_from_dfg_performance( dfg_perf, parameters={ "start_activities": sa, "end_activities": ea }) intervalo_um_dia_em_segundos = 60 * 60 * 24 intervalos = [ intervalo_um_dia_em_segundos * 30, intervalo_um_dia_em_segundos * 60, intervalo_um_dia_em_segundos * 90, intervalo_um_dia_em_segundos * 180, intervalo_um_dia_em_segundos * 365, intervalo_um_dia_em_segundos * 365 * 2, intervalo_um_dia_em_segundos * 365 * 3, intervalo_um_dia_em_segundos * 365 * 4, intervalo_um_dia_em_segundos * 365 * 5, intervalo_um_dia_em_segundos * 365 * 6, intervalo_um_dia_em_segundos * 365 * 7, intervalo_um_dia_em_segundos * 365 * 8, intervalo_um_dia_em_segundos * 365 * 9, intervalo_um_dia_em_segundos * 365 * 10 ] previsoes_por_intervalo = [] # pick the source state initial_state = [ x for x in tang_reach_graph.states if x.name == "source1" ][0] final_state = [x for x in tang_reach_graph.states if x.name == "sink1"][0] for intervalo in intervalos: # analyse the distribution over the states of the system starting from the source after 172800.0 seconds (2 days) transient_result = ctmc.transient_analysis_from_tangible_q_matrix_and_single_state( tang_reach_graph, q_matrix, initial_state, intervalo) for key, value in filter(lambda elem: elem[0].name == "sink1", transient_result.items()): previsoes_por_intervalo.append({ "intervaloEmDias": intervalo / intervalo_um_dia_em_segundos, "probabilidadeDeTermino": float(value) }) return previsoes_por_intervalo
def __add_operator_recursive_logs(operator, threshold, act_key, logs, use_msd): if operator.operator != pt.Operator.LOOP: for log in logs: operator.children.append( __inductive_miner( log, discover_dfg.apply( log, parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), threshold, operator, act_key, use_msd)) else: operator.children.append( __inductive_miner( logs[0], discover_dfg.apply( logs[0], parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), threshold, operator, act_key, use_msd)) logs = logs[1:] if len(logs) == 1: operator.children.append( __inductive_miner( logs[0], discover_dfg.apply( logs[0], parameters={ constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key }), threshold, operator, act_key, use_msd)) else: operator.children.append( __add_operator_recursive_logs( pt.ProcessTree(operator=pt.Operator.XOR, parent=operator), threshold, act_key, logs, use_msd)) return operator
def test_45(self): import os from pm4py.objects.log.importer.xes import importer as xes_importer log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.dfg import algorithm as dfg_discovery from pm4py.visualization.dfg import visualizer as dfg_visualization dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE) parameters = {dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"} gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE, parameters=parameters) dfg_visualization.save(gviz, os.path.join("test_output_data", "dfg.svg")) os.remove(os.path.join("test_output_data", "dfg.svg"))
def save_full_dfg(log): dfg = dfg_discovery.apply(log) gviz = dfg_visualization.apply( dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.view(gviz) parameters = { dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg" } gviz = dfg_visualization.apply( dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY, parameters=parameters) dfg_visualization.save(gviz, "dfg_full.svg") print('Full DFG saves as "dfg_full.svg"') return gviz