Exemple #1
0
    def detect_cut_if(self, second_iteration=False, parameters=None):
        # dfg_viz = dfg_factory.apply(self.log)
        # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
        # dfg_vis_factory.view(gviz)
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # check base cases:
        empty_log = base_case.empty_log(self.log)
        single_activity = base_case.single_activity(self.log, activity_key)
        if empty_log:
            self.detected_cut = 'empty_log'
        elif single_activity:
            self.detected_cut = 'single_activity'
        # if no base cases are found, search for a cut:
        # use the cutting and splitting functions of im_plain:
        else:
            found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()

            if found_plain_cut:
                self.apply_cut_im_plain(type_of_cut, cut, activity_key)
            # if im_plain does not find a cut, we filter on our threshold and then again apply the im_cut detection
            # but this time, we have to use different splitting functions:
            else:
                self.filter_dfg_on_threshold()
                """
                dfg_viz = dfg_factory.apply(self.log)
                gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
                dfg_vis_factory.view(gviz)
                """
                found_plain_cut, type_of_cut, cut = self.check_cut_im_plain()
                if found_plain_cut:
                    if type_of_cut == 'concurrent':
                        logging.debug("concurrent_cut_if")
                        self.detected_cut = 'concurrent'
                        new_logs = splitting_infrequent.split_xor_infrequent(
                            cut[1], self.log, activity_key)
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'sequential':
                        logging.debug("sequential_if")
                        new_logs = splitting_infrequent.split_sequence_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'parallel':
                        logging.debug("parallel_if")
                        new_logs = split.split_parallel(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "parallel"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    elif type_of_cut == 'loopCut':
                        logging.debug("loopCut_if")
                        new_logs = splitting_infrequent.split_loop_infrequent(
                            cut[1], self.log, activity_key)
                        self.detected_cut = "loopCut"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))

                else:
                    self.apply_fall_through_infrequent(parameters)
Exemple #2
0
    def detect_cut(self, second_iteration=False, parameters=None):
        if pkgutil.find_loader("networkx"):
            import networkx as nx

            if parameters is None:
                parameters = {}
            activity_key = exec_utils.get_param_value(
                Parameters.ACTIVITY_KEY, parameters,
                pmutil.xes_constants.DEFAULT_NAME_KEY)

            # check base cases:
            empty_log = base_case.empty_log(self.log)
            single_activity = base_case.single_activity(self.log, activity_key)
            if empty_log:
                self.detected_cut = 'empty_log'
            elif single_activity:
                self.detected_cut = 'single_activity'
            # if no base cases are found, search for a cut:
            else:
                conn_components = detection_utils.get_connected_components(
                    self.ingoing, self.outgoing, self.activities)
                this_nx_graph = transform_dfg_to_directed_nx_graph(
                    self.dfg, activities=self.activities)
                strongly_connected_components = [
                    list(x)
                    for x in nx.strongly_connected_components(this_nx_graph)
                ]
                xor_cut = self.detect_xor(conn_components)
                # the following part searches for a cut in the current log
                # if a cut is found, the log is split according to the cut, the resulting logs are saved in new_logs
                # recursion is used on all the logs in new_logs
                if xor_cut[0]:
                    logging.debug("xor_cut")
                    self.detected_cut = 'concurrent'
                    new_logs = split.split_xor(xor_cut[1], self.log,
                                               activity_key)
                    for i in range(len(new_logs)):
                        new_logs[
                            i] = filtering_utils.keep_one_trace_per_variant(
                                new_logs[i], parameters=parameters)
                    for l in new_logs:
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            l, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            l, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                l, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                l, parameters=parameters).keys())
                        self.children.append(
                            SubtreePlain(l,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                else:
                    sequence_cut = cut_detection.detect_sequential_cut(
                        self, self.dfg, strongly_connected_components)
                    if sequence_cut[0]:
                        logging.debug("sequence_cut")
                        new_logs = split.split_sequence(
                            sequence_cut[1], self.log, activity_key)
                        for i in range(len(new_logs)):
                            new_logs[
                                i] = filtering_utils.keep_one_trace_per_variant(
                                    new_logs[i], parameters=parameters)
                        self.detected_cut = "sequential"
                        for l in new_logs:
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                l, parameters=parameters).items() if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                l, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    l, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    l, parameters=parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    l,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                    else:
                        parallel_cut = self.detect_concurrent()
                        if parallel_cut[0]:
                            logging.debug("parallel_cut")
                            new_logs = split.split_parallel(
                                parallel_cut[1], self.log, activity_key)
                            for i in range(len(new_logs)):
                                new_logs[
                                    i] = filtering_utils.keep_one_trace_per_variant(
                                        new_logs[i], parameters=parameters)
                            self.detected_cut = "parallel"
                            for l in new_logs:
                                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                    l, parameters=parameters).items() if v > 0]
                                activities = attributes_filter.get_attribute_values(
                                    l, activity_key)
                                start_activities = list(
                                    start_activities_filter.
                                    get_start_activities(
                                        l, parameters=parameters).keys())
                                end_activities = list(
                                    end_activities_filter.get_end_activities(
                                        l, parameters=parameters).keys())
                                self.children.append(
                                    SubtreePlain(
                                        l,
                                        new_dfg,
                                        self.master_dfg,
                                        self.initial_dfg,
                                        activities,
                                        self.counts,
                                        self.rec_depth + 1,
                                        noise_threshold=self.noise_threshold,
                                        start_activities=start_activities,
                                        end_activities=end_activities,
                                        initial_start_activities=self.
                                        initial_start_activities,
                                        initial_end_activities=self.
                                        initial_end_activities,
                                        parameters=parameters))
                        else:
                            loop_cut = self.detect_loop()
                            if loop_cut[0]:
                                logging.debug("loop_cut")
                                new_logs = split.split_loop(
                                    loop_cut[1], self.log, activity_key)
                                for i in range(len(new_logs)):
                                    new_logs[
                                        i] = filtering_utils.keep_one_trace_per_variant(
                                            new_logs[i], parameters=parameters)
                                self.detected_cut = "loopCut"
                                for l in new_logs:
                                    new_dfg = [
                                        (k, v) for k, v in dfg_inst.apply(
                                            l, parameters=parameters).items()
                                        if v > 0
                                    ]
                                    activities = attributes_filter.get_attribute_values(
                                        l, activity_key)
                                    start_activities = list(
                                        start_activities_filter.
                                        get_start_activities(
                                            l, parameters=parameters).keys())
                                    end_activities = list(
                                        end_activities_filter.
                                        get_end_activities(
                                            l, parameters=parameters).keys())
                                    self.children.append(
                                        SubtreePlain(
                                            l,
                                            new_dfg,
                                            self.master_dfg,
                                            self.initial_dfg,
                                            activities,
                                            self.counts,
                                            self.rec_depth + 1,
                                            noise_threshold=self.
                                            noise_threshold,
                                            start_activities=start_activities,
                                            end_activities=end_activities,
                                            initial_start_activities=self.
                                            initial_start_activities,
                                            initial_end_activities=self.
                                            initial_end_activities,
                                            parameters=parameters))

                            # if the code gets to this point, there is no base_case and no cut found in the log
                            # therefore, we now apply fall through:
                            else:
                                self.apply_fall_through(parameters)
        else:
            msg = "networkx is not available. inductive miner cannot be used!"
            logging.error(msg)
            raise Exception(msg)
Exemple #3
0
 def apply_cut_im_plain(self, type_of_cut, cut, activity_key):
     # dfg_viz = dfg_factory.apply(self.log)
     # gviz = dfg_vis_factory.apply(dfg_viz, log=self.log, variant="frequency", parameters={"format": "PDF"})
     # dfg_vis_factory.view(gviz)
     if type_of_cut == 'concurrent':
         self.detected_cut = 'concurrent'
         new_logs = split.split_xor(cut[1], self.log, activity_key)
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'sequential':
         new_logs = split.split_sequence(cut[1], self.log, activity_key)
         self.detected_cut = "sequential"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'parallel':
         new_logs = split.split_parallel(cut[1], self.log, activity_key)
         self.detected_cut = "parallel"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))
     elif type_of_cut == 'loopCut':
         new_logs = split.split_loop(cut[1], self.log, activity_key)
         self.detected_cut = "loopCut"
         for l in new_logs:
             new_dfg = [(k, v) for k, v in dfg_inst.apply(
                 l, parameters=self.parameters).items() if v > 0]
             activities = attributes_filter.get_attribute_values(
                 l, activity_key)
             start_activities = list(
                 start_activities_filter.get_start_activities(
                     l, parameters=self.parameters).keys())
             end_activities = list(
                 end_activities_filter.get_end_activities(
                     l, parameters=self.parameters).keys())
             self.children.append(
                 SubtreeInfrequent(
                     l,
                     new_dfg,
                     self.master_dfg,
                     self.initial_dfg,
                     activities,
                     self.counts,
                     self.rec_depth + 1,
                     self.f,
                     noise_threshold=self.noise_threshold,
                     start_activities=start_activities,
                     end_activities=end_activities,
                     initial_start_activities=self.initial_start_activities,
                     initial_end_activities=self.initial_end_activities,
                     parameters=self.parameters))