def slice_dist_act(log_1, log_2, unit, parameters=None): (log1_list, freq1_list) = filter_subsets.logslice_percent(log_1, unit) (log2_list, freq2_list) = filter_subsets.logslice_percent(log_2, unit) if len(freq1_list) >= len(freq2_list): max_len = len(freq1_list) min_len = len(freq2_list) max_log = log1_list min_log = log2_list var_count_max = freq1_list var_count_min = freq2_list else: max_len = len(freq2_list) min_len = len(freq1_list) max_log = log2_list min_log = log1_list var_count_max = freq2_list var_count_min = freq1_list dist_matrix = np.zeros((max_len, min_len)) max_per_var = np.zeros(max_len) max_freq = np.zeros(max_len) min_freq = np.zeros(min_len) min_per_var = np.zeros(min_len) index_rec = set(list(range(min_len))) if log1_list == log2_list: print("Please give different variant lists!") dist = 0 else: for i in range(max_len): dist_vec = np.zeros(min_len) act1 = attributes_filter.get_attribute_values(max_log[i], "concept:name") df1_act = act_dist_calc.occu_var_act(act1) for j in range(min_len): act2 = attributes_filter.get_attribute_values(min_log[j], "concept:name") df2_act = act_dist_calc.occu_var_act(act2) df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0) dist_vec[j] = pdist(np.array([df_act['freq_x'].values, df_act['freq_y'].values]), 'cosine')[0] dist_matrix[i][j] = dist_vec[j] if j == (min_len - 1): max_loc_col = np.argmin(dist_vec) if abs(dist_vec[max_loc_col]) <= 1e-8: index_rec.discard(max_loc_col) max_freq[i] = var_count_max[i] * var_count_min[max_loc_col] * 2 max_per_var[i] = dist_vec[max_loc_col] * max_freq[i] * 2 else: max_freq[i] = var_count_max[i] * var_count_min[max_loc_col] max_per_var[i] = dist_vec[max_loc_col] * max_freq[i] if (len(index_rec) != 0): for i in list(index_rec): min_loc_row = np.argmin(dist_matrix[:, i]) min_freq[i] = var_count_max[min_loc_row] * var_count_min[i] min_per_var[i] = dist_matrix[min_loc_row, i] * min_freq[i] dist = (np.sum(max_per_var) + np.sum(min_per_var)) / (np.sum(max_freq) + np.sum(min_freq)) return dist
def dfg_dist_calc_minkowski(log1, log2, alpha): act1 = attributes_filter.get_attribute_values(log1, "concept:name") act2 = attributes_filter.get_attribute_values(log2, "concept:name") dfg1 = dfg_algorithm.apply(log1) dfg2 = dfg_algorithm.apply(log2) df1_act = act_dist_calc.occu_var_act(act1) df2_act = act_dist_calc.occu_var_act(act2) df1_dfg = act_dist_calc.occu_var_act(dfg1) df2_dfg = act_dist_calc.occu_var_act(dfg2) df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0) df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0) dist_act = pdist(np.array([ df_act['freq_x'].values / np.sum(df_act['freq_x'].values), df_act['freq_y'].values / np.sum(df_act['freq_y'].values) ]), 'minkowski', p=2.)[0] dist_dfg = pdist(np.array([ df_dfg['freq_x'].values / np.sum(df_dfg['freq_x'].values), df_dfg['freq_y'].values / np.sum(df_dfg['freq_y'].values) ]), 'minkowski', p=2.)[0] dist = dist_act * alpha + dist_dfg * (1 - alpha) return dist
def dfg_dist_calc_act(log1, log2): act1 = attributes_filter.get_attribute_values(log1, "concept:name") act2 = attributes_filter.get_attribute_values(log2, "concept:name") df1_act = act_dist_calc.occu_var_act(act1) df2_act = act_dist_calc.occu_var_act(act2) df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0) dist_act = pdist( np.array([df_act['freq_x'].values, df_act['freq_y'].values]), 'cosine')[0] return dist_act
def apply(dfg, log=None, parameters=None, activities_count=None, soj_time=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value(Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, []) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, []) font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, 12) font_size = str(font_size) activities = dfg_utils.get_activities_from_dfg(dfg) if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters) else: activities_count = {key: 1 for key in activities} if soj_time is None: if log is not None: soj_time = soj_time_get.apply(log, parameters=parameters) else: soj_time = {key: 0 for key in activities} return graphviz_visualization(activities_count, dfg, image_format=image_format, measure="performance", max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities, soj_time=soj_time, font_size=font_size)
def form_encoding_dictio_from_log(log, parameters=None): """ Forms the encoding dictionary from the current log Parameters ------------- log Event log parameters Parameters of the algorithm Returns ------------- encoding_dictio Encoding dictionary """ if parameters is None: parameters = {} activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY shared_obj = SharedObj() activities = attributes_get.get_attribute_values(log, activity_key, parameters=parameters) mapping = {} for act in activities: get_new_char(act, shared_obj) mapping[act] = shared_obj.mapping_dictio[act] return mapping
def apply(dfg, log=None, parameters=None, activities_count=None, measure="frequency"): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value( Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 75) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, []) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, []) if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values( log, activity_key, parameters=parameters) else: activities = dfg_utils.get_activities_from_dfg(dfg) activities_count = {key: 1 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure=measure, max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities)
def filter_log_on_max_no_activities(log, max_no_activities=25, parameters=None): """ Filter a log on a maximum number of activities Parameters ------------- log Log max_no_activities Maximum number of activities parameters Parameters of the algorithm Returns ------------- filtered_log Filtered version of the event log """ if parameters is None: parameters = {} activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key all_activities = sorted( [(x, y) for x, y in get_attribute_values(log, activity_key).items()], key=lambda x: x[1], reverse=True) activities = all_activities[:min(len(all_activities), max_no_activities)] activities = [x[0] for x in activities] if len(activities) < len(all_activities): log = apply_events(log, activities, parameters=parameters) return log
def get_activities_list(log, parameters=None): """ Gets the activities list from a log_skeleton object, sorted by activity name Parameters -------------- log Log parameters Possible parameters of the algorithm Returns ------------- activities_list List of activities sorted by activity name """ from pm4py.statistics.attributes.pandas import get as pd_attributes_filter from pm4py.statistics.attributes.log import get as log_attributes_filter if parameters is None: parameters = {} activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY if type(log) is pd.DataFrame: activities = pd_attributes_filter.get_attribute_values( log, activity_key) else: activities = log_attributes_filter.get_attribute_values( log, activity_key) return sorted(list(activities.keys()))
def form_encoding_dictio_from_two_logs(log1: EventLog, log2: EventLog, parameters: Optional[Dict[str, Any]] = None) -> \ Dict[str, str]: """ Forms the encoding dictionary from a couple of logs Parameters ---------------- log1 First log log2 Second log parameters Parameters of the algorithm Returns ---------------- encoding_dictio Encoding dictionary """ from pm4py.statistics.attributes.log import get as attributes_get if parameters is None: parameters = {} activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY shared_obj = SharedObj() activities_log_1 = attributes_get.get_attribute_values( log1, activity_key, parameters=parameters) activities_log_2 = attributes_get.get_attribute_values( log2, activity_key, parameters=parameters) mapping = {} for act in activities_log_1: if act not in mapping: get_new_char(act, shared_obj) mapping[act] = shared_obj.mapping_dictio[act] for act in activities_log_2: if act not in mapping: get_new_char(act, shared_obj) mapping[act] = shared_obj.mapping_dictio[act] return mapping
def filter_log_relative_occurrence_event_attribute( log: EventLog, min_relative_stake: float, parameters: Optional[Dict[Any, Any]] = None) -> EventLog: """ Filters the event log keeping only the events having an attribute value which occurs: - in at least the specified (min_relative_stake) percentage of events, when Parameters.KEEP_ONCE_PER_CASE = False - in at least the specified (min_relative_stake) percentage of cases, when Parameters.KEEP_ONCE_PER_CASE = True Parameters ------------------- log Event log min_relative_stake Minimum percentage of cases (expressed as a number between 0 and 1) in which the attribute should occur. parameters Parameters of the algorithm, including: - Parameters.ATTRIBUTE_KEY => the attribute to use (default: concept:name) - Parameters.KEEP_ONCE_PER_CASE => decides the level of the filter to apply (if the filter should be applied on the cases, set it to True). Returns ------------------ filtered_log Filtered event log """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, xes.DEFAULT_NAME_KEY) keep_once_per_case = exec_utils.get_param_value( Parameters.KEEP_ONCE_PER_CASE, parameters, True) parameters_cp = copy(parameters) activities_occurrences = get_attribute_values(log, attribute_key, parameters=parameters_cp) if keep_once_per_case: # filter on cases filtered_attributes = set(x for x, y in activities_occurrences.items() if y >= min_relative_stake * len(log)) else: # filter on events filtered_attributes = set( x for x, y in activities_occurrences.items() if y >= min_relative_stake * sum(len(x) for x in log)) return apply_events(log, filtered_attributes, parameters=parameters)
def apply_heu(log, parameters=None): """ Discovers an Heuristics Net using Heuristics Miner Parameters ------------ log Event log parameters Possible parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY - Parameters.DEPENDENCY_THRESH - Parameters.AND_MEASURE_THRESH - Parameters.MIN_ACT_COUNT - Parameters.MIN_DFG_OCCURRENCES - Parameters.DFG_PRE_CLEANING_NOISE_THRESH - Parameters.LOOP_LENGTH_TWO_THRESH Returns ------------ heu Heuristics Net """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_activities = log_sa_filter.get_start_activities( log, parameters=parameters) end_activities = log_ea_filter.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values( log, activity_key, parameters=parameters) activities = list(activities_occurrences.keys()) dfg = dfg_alg.apply(log, parameters=parameters) parameters_w2 = deepcopy(parameters) parameters_w2["window"] = 2 dfg_window_2 = dfg_alg.apply(log, parameters=parameters_w2) freq_triples = dfg_alg.apply(log, parameters=parameters, variant=dfg_alg.Variants.FREQ_TRIPLES) return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences, start_activities=start_activities, end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples, parameters=parameters)
def apply_tree(log, parameters=None): """ Apply the IMDF algorithm to a log obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- tree Process tree """ if parameters is None: parameters = {} if pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[pmutil.constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_util.DEFAULT_NAME_KEY activity_key = parameters[pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply( log, parameters={ pmutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY: activity_key }).items() if v > 0] # gets the start activities from the log start_activities = log_start_act_stats.get_start_activities( log, parameters=parameters) # gets the end activities from the log end_activities = log_end_act_stats.get_end_activities( log, parameters=parameters) # get the activities in the log activities = log_attributes_stats.get_attribute_values(log, activity_key) # check if the log contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 return apply_tree_dfg(dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities)
def dfg_dist_calc(log1, log2): act1 = attributes_filter.get_attribute_values(log1, "concept:name") act2 = attributes_filter.get_attribute_values(log2, "concept:name") dfg1 = dfg_algorithm.apply(log1) dfg2 = dfg_algorithm.apply(log2) df1_act = act_dist_calc.occu_var_act(act1) df2_act = act_dist_calc.occu_var_act(act2) df1_dfg = act_dist_calc.occu_var_act(dfg1) df2_dfg = act_dist_calc.occu_var_act(dfg2) df_act = pd.merge(df1_act, df2_act, how='outer', on='var').fillna(0) df_dfg = pd.merge(df1_dfg, df2_dfg, how='outer', on='var').fillna(0) dist_act = pdist( np.array([df_act['freq_x'].values, df_act['freq_y'].values]), 'cosine')[0] dist_dfg = pdist( np.array([df_dfg['freq_x'].values, df_dfg['freq_y'].values]), 'cosine')[0] if (np.isnan(dist_dfg) == True): dist_dfg = 1 return dist_act, dist_dfg
def discover_abstraction_log( log: EventLog, parameters: Optional[Dict[Any, Any]] = None ) -> Tuple[Any, Any, Any, Any, Any, Any, Any]: """ Discovers an abstraction from a log that is useful for the Heuristics Miner ++ algorithm Parameters -------------- log Event log parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.START_TIMESTAMP_KEY - Parameters.TIMESTAMP_KEY - Parameters.CASE_ID_KEY Returns -------------- start_activities Start activities end_activities End activities activities_occurrences Activities along with their number of occurrences dfg Directly-follows graph performance_dfg (Performance) Directly-follows graph sojourn_time Sojourn time for each activity concurrent_activities Concurrent activities """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) start_activities = log_sa.get_start_activities(log, parameters=parameters) end_activities = log_ea.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values( log, activity_key, parameters=parameters) efg_parameters = copy(parameters) efg_parameters[efg_get.Parameters.KEEP_FIRST_FOLLOWING] = True dfg = efg_get.apply(log, parameters=efg_parameters) performance_dfg = dfg_alg.apply(log, variant=dfg_alg.Variants.PERFORMANCE, parameters=parameters) sojourn_time = soj_get.apply(log, parameters=parameters) concurrent_activities = conc_act_get.apply(log, parameters=parameters) return (start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time, concurrent_activities)
def apply_tree(log, parameters=None): """ Apply the IMDF algorithm to a log_skeleton obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log_skeleton to use as activity name (default concept:name) Returns ---------- tree Process tree """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # get the DFG dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0] # gets the start activities from the log_skeleton start_activities = log_start_act_stats.get_start_activities( log, parameters=parameters) # gets the end activities from the log_skeleton end_activities = log_end_act_stats.get_end_activities( log, parameters=parameters) # get the activities in the log_skeleton activities = log_attributes_stats.get_attribute_values(log, activity_key) # check if the log_skeleton contains empty traces contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 return apply_tree_dfg(dfg, parameters=parameters, activities=activities, contains_empty_traces=contains_empty_traces, start_activities=start_activities, end_activities=end_activities)
def get_decorated_net(net, initial_marking, final_marking, log, parameters=None, variant="frequency"): """ Get a decorated net according to the specified variant (decorate Petri net based on DFG) Parameters ------------ net Petri net initial_marking Initial marking final_marking Final marking log Log to use to decorate the Petri net parameters Algorithm parameters variant Specify if the decoration should take into account the frequency or the performance Returns ------------ gviz GraphViz object """ if parameters is None: parameters = {} aggregation_measure = exec_utils.get_param_value(Parameters.AGGREGATION_MEASURE, parameters, "sum" if "frequency" in variant else "mean") activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE, parameters, {}) # we find the DFG if variant == "performance": dfg = performance.performance(log, parameters=parameters) else: dfg = native.native(log, parameters=parameters) # we find shortest paths spaths = get_shortest_paths(net) # we find the number of activities occurrences in the log activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters) aggregated_statistics = get_decorations_from_dfg_spaths_acticount(net, dfg, spaths, activities_count, variant=variant, aggregation_measure=aggregation_measure, stat_locale=stat_locale) return visualize.apply(net, initial_marking, final_marking, parameters=parameters, decorations=aggregated_statistics)
def apply_auto_filter(log, variants=None, parameters=None): """ Apply an attributes filter detecting automatically a percentage Parameters ---------- log Log variants (If specified) Dictionary with variant as the key and the list of traces as the value parameters Parameters of the algorithm, including: Parameters.DECREASING_FACTOR -> Decreasing factor (stops the algorithm when the next activity by occurrence is below this factor in comparison to previous) Parameters.ATTRIBUTE_KEY -> Attribute key (must be specified if different from concept:name) Returns --------- filtered_log Filtered log_skeleton """ if parameters is None: parameters = {} attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY) decreasing_factor = exec_utils.get_param_value( Parameters.DECREASING_FACTOR, parameters, filtering_constants.DECREASING_FACTOR) parameters_variants = { PARAMETER_CONSTANT_ATTRIBUTE_KEY: attribute_key, PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key } if len(log) > 0: if variants is None: variants = variants_filter.get_variants( log, parameters=parameters_variants) vc = variants_filter.get_variants_sorted_by_count(variants) attributes_values = get_attribute_values( log, attribute_key, parameters=parameters_variants) alist = attributes_common.get_sorted_attributes_list(attributes_values) thresh = attributes_common.get_attributes_threshold( alist, decreasing_factor) filtered_log = filter_log_by_attributes_threshold( log, attributes_values, variants, vc, thresh, attribute_key) return filtered_log return log
def test_dfdoc1(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(log) from pm4py.statistics.attributes.log import get as attributes_filter activities_count = attributes_filter.get_attribute_values( log, "concept:name") from pm4py.visualization.dfg.versions import simple_visualize as dfg_visualize gviz = dfg_visualize.graphviz_visualization(activities_count, dfg) del gviz
def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) print("number of cases", len(log)) print("number of events", sum(len(x) for x in log)) print("number of variants", len(pm4py.get_variants(log))) ac = get.get_attribute_values(log, "concept:name") dfg, sa, ea = pm4py.discover_dfg(log) perc = 0.5 dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, ac, perc) dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, ac, perc) aa = time.time() aligned_traces = dfg_alignment.apply(log, dfg, sa, ea) bb = time.time() net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea) for trace in aligned_traces: if trace["cost"] != trace["internal_cost"]: print(trace) pass print(bb - aa) print(sum(x["visited_states"] for x in aligned_traces)) print( sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST for x in aligned_traces)) gviz = visualizer.apply(dfg, activities_count=ac, parameters={ "start_activities": sa, "end_activities": ea, "format": "svg" }) visualizer.view(gviz) cc = time.time() aligned_traces2 = petri_alignments.apply( log, net, im, fm, variant=petri_alignments.Variants.VERSION_DIJKSTRA_LESS_MEMORY) dd = time.time() print(dd - cc) print(sum(x["visited_states"] for x in aligned_traces2)) print( sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST for x in aligned_traces2))
def filter_log_on_max_no_activities( log: EventLog, max_no_activities: int = 25, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Filter a log on a maximum number of activities Parameters ------------- log Log max_no_activities Maximum number of activities parameters Parameters of the algorithm Returns ------------- filtered_log Filtered version of the event log """ if parameters is None: parameters = {} log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key all_activities = sorted( [(x, y) for x, y in get_attribute_values(log, activity_key).items()], key=lambda x: x[1], reverse=True) activities = all_activities[:min(len(all_activities), max_no_activities)] activities = [x[0] for x in activities] if len(activities) < len(all_activities): log = apply_events(log, activities, parameters=parameters) return log
def apply(dfg, log=None, parameters=None, activities_count=None, measure="frequency"): if parameters is None: parameters = {} activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY image_format = "png" max_no_of_edges_in_diagram = 75 if "format" in parameters: image_format = parameters["format"] if "maxNoOfEdgesInDiagram" in parameters: max_no_of_edges_in_diagram = parameters["maxNoOfEdgesInDiagram"] start_activities = parameters[ "start_activities"] if "start_activities" in parameters else [] end_activities = parameters[ "end_activities"] if "end_activities" in parameters else [] if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values( log, activity_key, parameters=parameters) else: activities = dfg_utils.get_activities_from_dfg(dfg) activities_count = {key: 1 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure=measure, max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities)
def apply_heu(log, parameters=None): """ Discovers an Heuristics Net using Heuristics Miner Parameters ------------ log Event log parameters Possible parameters of the algorithm, including: activity_key, case_id_glue, timestamp_key, dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh, loops_length_two_thresh Returns ------------ heu Heuristics Net """ if parameters is None: parameters = {} activity_key = parameters[ constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY start_activities = log_sa_filter.get_start_activities(log, parameters=parameters) end_activities = log_ea_filter.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values(log, activity_key, parameters=parameters) activities = list(activities_occurrences.keys()) dfg = dfg_factory.apply(log, parameters=parameters) parameters_w2 = deepcopy(parameters) parameters_w2["window"] = 2 dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2) freq_triples = dfg_factory.apply(log, parameters=parameters, variant="freq_triples") return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences, start_activities=start_activities, end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples, parameters=parameters)
def apply_tree(log, parameters): """ Apply the IM_FF algorithm to a log obtaining a process tree Parameters ---------- log Log parameters Parameters of the algorithm, including: Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name (default concept:name) Returns ---------- process_tree Process tree """ if parameters is None: parameters = {} if pkgutil.find_loader("pandas"): import pandas as pd from pm4py.statistics.variants.pandas import get as variants_get if type(log) is pd.DataFrame: vars = variants_get.get_variants_count(log, parameters=parameters) return apply_tree_variants(vars, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, parameters=parameters) # keep only the activity attribute (since the others are not used) log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key) noise_threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD, parameters, shared_constants.NOISE_THRESHOLD_IMF) dfg = [(k, v) for k, v in dfg_inst.apply(log, parameters=parameters).items() if v > 0] c = Counts() activities = attributes_get.get_attribute_values(log, activity_key) start_activities = list(start_activities_get.get_start_activities(log, parameters=parameters).keys()) end_activities = list(end_activities_get.get_end_activities(log, parameters=parameters).keys()) contains_empty_traces = False traces_length = [len(trace) for trace in log] if traces_length: contains_empty_traces = min([len(trace) for trace in log]) == 0 # set the threshold parameter based on f and the max value in the dfg: max_value = 0 for key, value in dfg: if value > max_value: max_value = value threshold = noise_threshold * max_value recursion_depth = 0 sub = subtree.make_tree(log, dfg, dfg, dfg, activities, c, recursion_depth, noise_threshold, threshold, start_activities, end_activities, start_activities, end_activities, parameters=parameters) process_tree = get_tree_repr_implain.get_repr(sub, 0, contains_empty_traces=contains_empty_traces) # Ensures consistency to the parent pointers in the process tree tree_consistency.fix_parent_pointers(process_tree) # Fixes a 1 child XOR that is added when single-activities flowers are found tree_consistency.fix_one_child_xor_flower(process_tree) # folds the process tree (to simplify it in case fallthroughs/filtering is applied) process_tree = util.fold(process_tree) return process_tree
print("") print(log_path) if "xes" in log_name: from pm4py.statistics.attributes.log import get as attributes_get_log log = pm4py.read_xes(log_path) for trace in log: for event in trace: if True and "lifecycle:transition" in event: event["@@classifier"] = event[ "concept:name"] + "+" + event[ "lifecycle:transition"] # event["concept:name"] = event["concept:name"] + "+" + event["lifecycle:transition"] else: event["@@classifier"] = event["concept:name"] activities = set( attributes_get_log.get_attribute_values(log, CLASSIFIER).keys()) variants = variants_get.get_variants( log, parameters={"pm4py:param:activity_key": CLASSIFIER}) fp_log = pm4py.algo.discovery.footprints.log.variants.entire_event_log.apply( log, parameters={"pm4py:param:activity_key": CLASSIFIER}) elif "parquet" in log_name: from pm4py.statistics.attributes.pandas import get as attributes_get_pandas dataframe = pd.read_parquet(log_path) activities = set( attributes_get_pandas.get_attribute_values( dataframe, CLASSIFIER).keys()) variants = pm4py.get_variants(dataframe) fp_log = pm4py.algo.discovery.footprints.log.variants.entire_dataframe.apply( dataframe) print("start tree_im_clean") tree_im_clean = im_clean.apply_tree(
def apply(dfg: Dict[Tuple[str, str], int], log: EventLog = None, parameters: Optional[Dict[Any, Any]] = None, activities_count: Dict[str, int] = None, soj_time: Dict[str, float] = None) -> Digraph: """ Visualize a frequency directly-follows graph Parameters ----------------- dfg Frequency Directly-follows graph log (if provided) Event log for the calculation of statistics activities_count (if provided) Dictionary associating to each activity the number of occurrences in the log. soj_time (if provided) Dictionary associating to each activity the average sojourn time parameters Variant-specific parameters Returns ----------------- gviz Graphviz digraph """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png") max_no_of_edges_in_diagram = exec_utils.get_param_value( Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000) start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, {}) end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, {}) font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters, 12) font_size = str(font_size) activities = dfg_utils.get_activities_from_dfg(dfg) bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters, "transparent") stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE, parameters, None) if stat_locale is None: stat_locale = {} if activities_count is None: if log is not None: activities_count = attr_get.get_attribute_values( log, activity_key, parameters=parameters) else: # the frequency of an activity in the log is at least the number of occurrences of # incoming arcs in the DFG. # if the frequency of the start activities nodes is also provided, use also that. activities_count = Counter({key: 0 for key in activities}) for el in dfg: activities_count[el[1]] += dfg[el] if isinstance(start_activities, dict): for act in start_activities: activities_count[act] += start_activities[act] if soj_time is None: if log is not None: soj_time = soj_time_get.apply(log, parameters=parameters) else: soj_time = {key: 0 for key in activities} return graphviz_visualization( activities_count, dfg, image_format=image_format, measure="frequency", max_no_of_edges_in_diagram=max_no_of_edges_in_diagram, start_activities=start_activities, end_activities=end_activities, soj_time=soj_time, font_size=font_size, bgcolor=bgcolor, stat_locale=stat_locale)
def select_attributes_from_log_for_tree( log: EventLog, max_cases_for_attr_selection=DEFAULT_MAX_CASES_FOR_ATTR_SELECTION, max_diff_occ=DEFAULT_MAX_CASES_FOR_ATTR_SELECTION / 4): """ Select attributes from log for tree Parameters ------------ log Log max_cases_for_attr_selection Maximum number of cases to consider for attribute selection max_diff_occ Maximum number of different occurrences Returns ------------ """ log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG) if len(log) > max_cases_for_attr_selection: filtered_log = sampling.sample(log, max_cases_for_attr_selection) else: filtered_log = log event_attributes = get_all_event_attributes_from_log(filtered_log) trace_attributes = get_all_trace_attributes_from_log(filtered_log) event_attributes_values = {} trace_attributes_values = {} for attr in event_attributes: event_attributes_values[attr] = set( get_attribute_values(log, attr).keys()) for attr in trace_attributes: trace_attributes_values[attr] = set( get_trace_attribute_values(log, attr).keys()) numeric_event_attributes_to_consider = list() string_event_attributes_to_consider = list() numeric_trace_attributes_to_consider = list() string_trace_attributes_to_consider = list() for attr in event_attributes_values: if type(list(event_attributes_values[attr])[0]) is int or type( list(event_attributes_values[attr])[0]) is float: numeric_event_attributes_to_consider.append(attr) elif type(list(event_attributes_values[attr])[0]) is str and len( event_attributes_values[attr]) < max_diff_occ: string_event_attributes_to_consider.append(attr) for attr in trace_attributes_values: if type(list(trace_attributes_values[attr])[0]) is int or type( list(trace_attributes_values[attr])[0]) is float: numeric_trace_attributes_to_consider.append(attr) elif type(list(trace_attributes_values[attr])[0]) is str and len( trace_attributes_values[attr]) < max_diff_occ: string_trace_attributes_to_consider.append(attr) numeric_event_attributes_to_consider = check_event_attributes_presence( log, numeric_event_attributes_to_consider) string_event_attributes_to_consider = check_event_attributes_presence( log, string_event_attributes_to_consider) numeric_trace_attributes_to_consider = check_trace_attributes_presence( log, numeric_trace_attributes_to_consider) string_trace_attributes_to_consider = check_trace_attributes_presence( log, string_trace_attributes_to_consider) return string_trace_attributes_to_consider, string_event_attributes_to_consider, numeric_trace_attributes_to_consider, numeric_event_attributes_to_consider
import numpy.random as random random.seed(0) import pm4py.statistics.variants.log.get as getvariants ''' Author : Boltenhagen Mathilde Date : June 2020 randomSequences.py : this file has been created to get 1000 mock traces ''' log = xes_importer.apply("<original log>") variants = getvariants.get_variants(log) # get activities and maximum length in log activities = list(get_attribute_values(log,"concept:name").keys()) max_len = (len(max(project_traces(log),key=len))) log._list=[] for t in range(0,1000): new_sequence = Trace() # random length of the fake sequence size_of_sequence = random.randint(1,max_len-1) # random activities for e in range(0,size_of_sequence): event = Event() event["concept:name"]=activities[random.randint(1,len(activities))] new_sequence.append(event) log._list.append(new_sequence) xes_exporter.apply(log,"<1000 mock traces>")
def detect_cut(self, second_iteration=False, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # check base cases: empty_log = base_case.empty_log(self.log) single_activity = base_case.single_activity(self.log, activity_key) if empty_log: self.detected_cut = 'empty_log' elif single_activity: self.detected_cut = 'single_activity' # if no base cases are found, search for a cut: else: conn_components = detection_utils.get_connected_components( self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph( self.dfg, activities=self.activities) strongly_connected_components = [ list(x) for x in nx.strongly_connected_components(this_nx_graph) ] xor_cut = self.detect_xor(conn_components) # the following part searches for a cut in the current log # if a cut is found, the log is split according to the cut, the resulting logs are saved in new_logs # recursion is used on all the logs in new_logs if xor_cut[0]: logging.debug("xor_cut") self.detected_cut = 'concurrent' new_logs = split.split_xor(xor_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( l, activity_key) start_activities = list( start_activities_get.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: sequence_cut = cut_detection.detect_sequential_cut( self, self.dfg, strongly_connected_components) if sequence_cut[0]: logging.debug("sequence_cut") new_logs = split.split_sequence( sequence_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( l, activity_key) start_activities = list( start_activities_get.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: logging.debug("parallel_cut") new_logs = split.split_parallel( parallel_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( l, activity_key) start_activities = list( start_activities_get.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: loop_cut = self.detect_loop() if loop_cut[0]: logging.debug("loop_cut") new_logs = split.split_loop( loop_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [ (k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0 ] activities = attributes_get.get_attribute_values( l, activity_key) start_activities = list( start_activities_get. get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_get.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self. noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) # if the code gets to this point, there is no base_case and no cut found in the log # therefore, we now apply fall through: else: self.apply_fall_through(parameters) else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
def apply_fall_through(self, parameters=None): if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # set flags for fall_throughs, base case is True (enabled) use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters ) or parameters[Parameters.EMPTY_TRACE_KEY] use_act_once_per_trace = ( Parameters.ONCE_PER_TRACE_KEY not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY] use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters ) or parameters[Parameters.CONCURRENT_KEY] use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters ) or parameters[Parameters.STRICT_TAU_LOOP_KEY] use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters ) or parameters[Parameters.TAU_LOOP_KEY] if use_empty_trace: empty_trace, new_log = fall_through.empty_trace(self.log) # if an empty trace is found, the empty trace fallthrough applies # else: empty_trace = False if empty_trace: logging.debug("empty_trace") activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'empty_trace' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_get.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_once_per_trace: activity_once, new_log, small_log = fall_through.act_once_per_trace( self.log, self.activities, activity_key) small_log = filtering_utils.keep_one_trace_per_variant( small_log, parameters=parameters) else: activity_once = False if use_act_once_per_trace and activity_once: self.detected_cut = 'parallel' # create two new dfgs as we need them to append to self.children later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_get.get_attribute_values( small_log, activity_key) self.children.append( SubtreePlain( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log start_activities = list( start_activities_get.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_act_concurrent: activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent( self, self.log, self.activities, activity_key, parameters=parameters) small_log = filtering_utils.keep_one_trace_per_variant( small_log, parameters=parameters) else: activity_concurrent = False if use_act_concurrent and activity_concurrent: self.detected_cut = 'parallel' # create two new dfgs on to append later new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( new_log, activity_key) small_dfg = [(k, v) for k, v in dfg_inst.apply( small_log, parameters=parameters).items() if v > 0] small_activities = attributes_get.get_attribute_values( small_log, activity_key) # append the concurrent activity as leaf: self.children.append( SubtreePlain( small_log, small_dfg, self.master_dfg, self.initial_dfg, small_activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) # continue with the recursion on the new log: start_activities = list( start_activities_get.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=parameters)) else: if use_strict_tau_loop: strict_tau_loop, new_log = fall_through.strict_tau_loop( self.log, self.start_activities, self.end_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant( new_log, parameters=parameters) else: strict_tau_loop = False if use_strict_tau_loop and strict_tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append(act[activity_key]) self.detected_cut = 'strict_tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_get.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain(new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: if use_tau_loop: tau_loop, new_log = fall_through.tau_loop( self.log, self.start_activities, activity_key) new_log = filtering_utils.keep_one_trace_per_variant( new_log, parameters=parameters) else: tau_loop = False if use_tau_loop and tau_loop: activites_left = [] for trace in new_log: for act in trace: if act[activity_key] not in activites_left: activites_left.append( act[activity_key]) self.detected_cut = 'tau_loop' new_dfg = [(k, v) for k, v in dfg_inst.apply( new_log, parameters=parameters).items() if v > 0] activities = attributes_get.get_attribute_values( new_log, activity_key) start_activities = list( start_activities_get.get_start_activities( new_log, parameters=self.parameters).keys()) end_activities = list( end_activities_get.get_end_activities( new_log, parameters=self.parameters).keys()) self.children.append( SubtreePlain( new_log, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: logging.debug("flower model") activites_left = [] for trace in self.log: for act in trace: if act[activity_key] not in activites_left: activites_left.append( act[activity_key]) self.detected_cut = 'flower'
def test_get_attributes(self): from pm4py.statistics.attributes.log import get log = self.get_log() get.get_attribute_values(log, "concept:name") get.get_kde_date_attribute(log, "time:timestamp") get.get_kde_numeric_attribute(log, "amount")