def detect_cut(self, second_iteration=False, parameters=None): if pkgutil.find_loader("networkx"): import networkx as nx if parameters is None: parameters = {} activity_key = exec_utils.get_param_value( Parameters.ACTIVITY_KEY, parameters, pmutil.xes_constants.DEFAULT_NAME_KEY) # check base cases: empty_log = base_case.empty_log(self.log) single_activity = base_case.single_activity(self.log, activity_key) if empty_log: self.detected_cut = 'empty_log' elif single_activity: self.detected_cut = 'single_activity' # if no base cases are found, search for a cut: else: conn_components = detection_utils.get_connected_components( self.ingoing, self.outgoing, self.activities) this_nx_graph = transform_dfg_to_directed_nx_graph( self.dfg, activities=self.activities) strongly_connected_components = [ list(x) for x in nx.strongly_connected_components(this_nx_graph) ] xor_cut = self.detect_xor(conn_components) # the following part searches for a cut in the current log_skeleton # if a cut is found, the log_skeleton is split according to the cut, the resulting logs are saved in new_logs # recursion is used on all the logs in new_logs if xor_cut[0]: logging.debug("xor_cut") self.detected_cut = 'concurrent' new_logs = split.split_xor(xor_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain(l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: sequence_cut = cut_detection.detect_sequential_cut( self, self.dfg, strongly_connected_components) if sequence_cut[0]: logging.debug("sequence_cut") new_logs = split.split_sequence( sequence_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: parallel_cut = self.detect_concurrent() if parallel_cut[0]: logging.debug("parallel_cut") new_logs = split.split_parallel( parallel_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter. get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) else: loop_cut = self.detect_loop() if loop_cut[0]: logging.debug("loop_cut") new_logs = split.split_loop( loop_cut[1], self.log, activity_key) for i in range(len(new_logs)): new_logs[ i] = filtering_utils.keep_one_trace_per_variant( new_logs[i], parameters=parameters) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [ (k, v) for k, v in dfg_inst.apply( l, parameters=parameters).items() if v > 0 ] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter. get_start_activities( l, parameters=parameters).keys()) end_activities = list( end_activities_filter. get_end_activities( l, parameters=parameters).keys()) self.children.append( SubtreePlain( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, noise_threshold=self. noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self. initial_start_activities, initial_end_activities=self. initial_end_activities, parameters=parameters)) # if the code gets to this point, there is no base_case and no cut found in the log_skeleton # therefore, we now apply fall through: else: self.apply_fall_through(parameters) else: msg = "networkx is not available. inductive miner cannot be used!" logging.error(msg) raise Exception(msg)
def apply_cut_im_plain(self, type_of_cut, cut, activity_key): if type_of_cut == 'concurrent': self.detected_cut = 'concurrent' new_logs = split.split_xor(cut[1], self.log, activity_key) for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'sequential': new_logs = split.split_sequence(cut[1], self.log, activity_key) self.detected_cut = "sequential" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'parallel': new_logs = split.split_parallel(cut[1], self.log, activity_key) self.detected_cut = "parallel" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters)) elif type_of_cut == 'loopCut': new_logs = split.split_loop(cut[1], self.log, activity_key) self.detected_cut = "loopCut" for l in new_logs: new_dfg = [(k, v) for k, v in dfg_inst.apply( l, parameters=self.parameters).items() if v > 0] activities = attributes_filter.get_attribute_values( l, activity_key) start_activities = list( start_activities_filter.get_start_activities( l, parameters=self.parameters).keys()) end_activities = list( end_activities_filter.get_end_activities( l, parameters=self.parameters).keys()) self.children.append( SubtreeInfrequent( l, new_dfg, self.master_dfg, self.initial_dfg, activities, self.counts, self.rec_depth + 1, self.f, noise_threshold=self.noise_threshold, start_activities=start_activities, end_activities=end_activities, initial_start_activities=self.initial_start_activities, initial_end_activities=self.initial_end_activities, parameters=self.parameters))