Ejemplo n.º 1
0
    def apply_fall_through(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_trace, new_log = fall_through.empty_trace(self.log)
            # if an empty trace is found, the empty trace fallthrough applies
            #
        else:
            empty_trace = False
        if empty_trace:
            logging.debug("empty_trace")
            activites_left = []
            for trace in new_log:
                for act in trace:
                    if act[activity_key] not in activites_left:
                        activites_left.append(act[activity_key])
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=self.parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=self.parameters).keys())
            self.children.append(
                SubtreePlain(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
                small_log = filtering_utils.keep_one_trace_per_variant(
                    small_log, parameters=parameters)
            else:
                activity_once = False
            if use_act_once_per_trace and activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                self.children.append(
                    SubtreePlain(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log_skeleton
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=self.parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=self.parameters).keys())
                self.children.append(
                    SubtreePlain(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, activity_left_out = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                    small_log = filtering_utils.keep_one_trace_per_variant(
                        small_log, parameters=parameters)
                else:
                    activity_concurrent = False
                if use_act_concurrent and activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreePlain(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log_skeleton:
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=self.parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=self.parameters).keys())
                    self.children.append(
                        SubtreePlain(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                        new_log = filtering_utils.keep_one_trace_per_variant(
                            new_log, parameters=parameters)
                    else:
                        strict_tau_loop = False
                    if use_strict_tau_loop and strict_tau_loop:
                        activites_left = []
                        for trace in new_log:
                            for act in trace:
                                if act[activity_key] not in activites_left:
                                    activites_left.append(act[activity_key])
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=self.parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=self.parameters).keys())
                        self.children.append(
                            SubtreePlain(new_log,
                                         new_dfg,
                                         self.master_dfg,
                                         self.initial_dfg,
                                         activities,
                                         self.counts,
                                         self.rec_depth + 1,
                                         noise_threshold=self.noise_threshold,
                                         start_activities=start_activities,
                                         end_activities=end_activities,
                                         initial_start_activities=self.
                                         initial_start_activities,
                                         initial_end_activities=self.
                                         initial_end_activities,
                                         parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                            new_log = filtering_utils.keep_one_trace_per_variant(
                                new_log, parameters=parameters)
                        else:
                            tau_loop = False
                        if use_tau_loop and tau_loop:
                            activites_left = []
                            for trace in new_log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log,
                                    parameters=self.parameters).keys())
                            self.children.append(
                                SubtreePlain(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower model")
                            activites_left = []
                            for trace in self.log:
                                for act in trace:
                                    if act[activity_key] not in activites_left:
                                        activites_left.append(
                                            act[activity_key])
                            self.detected_cut = 'flower'
Ejemplo n.º 2
0
    def apply_fall_through_infrequent(self, parameters=None):
        if parameters is None:
            parameters = {}
        activity_key = exec_utils.get_param_value(
            Parameters.ACTIVITY_KEY, self.parameters,
            pmutil.xes_constants.DEFAULT_NAME_KEY)

        # set flags for fall_throughs, base case is True (enabled)
        use_empty_trace = (Parameters.EMPTY_TRACE_KEY not in parameters
                           ) or parameters[Parameters.EMPTY_TRACE_KEY]
        use_act_once_per_trace = (
            Parameters.ONCE_PER_TRACE_KEY
            not in parameters) or parameters[Parameters.ONCE_PER_TRACE_KEY]
        use_act_concurrent = (Parameters.CONCURRENT_KEY not in parameters
                              ) or parameters[Parameters.CONCURRENT_KEY]
        use_strict_tau_loop = (Parameters.STRICT_TAU_LOOP_KEY not in parameters
                               ) or parameters[Parameters.STRICT_TAU_LOOP_KEY]
        use_tau_loop = (Parameters.TAU_LOOP_KEY not in parameters
                        ) or parameters[Parameters.TAU_LOOP_KEY]

        if use_empty_trace:
            empty_traces_present, enough_traces, new_log = fall_through_infrequent.empty_trace_filtering(
                self.log, self.f)
            self.log = new_log
        else:
            empty_traces_present = False
            enough_traces = False
        # if an empty trace is found, the empty trace fallthrough applies
        if empty_traces_present and enough_traces:
            logging.debug("empty_trace_if")
            self.detected_cut = 'empty_trace'
            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                new_log, parameters=self.parameters).items() if v > 0]
            activities = attributes_filter.get_attribute_values(
                new_log, activity_key)
            start_activities = list(
                start_activities_filter.get_start_activities(
                    new_log, parameters=parameters).keys())
            end_activities = list(
                end_activities_filter.get_end_activities(
                    new_log, parameters=parameters).keys())
            self.children.append(
                SubtreeInfrequent(
                    new_log,
                    new_dfg,
                    self.master_dfg,
                    self.initial_dfg,
                    activities,
                    self.counts,
                    self.rec_depth + 1,
                    self.f,
                    noise_threshold=self.noise_threshold,
                    start_activities=start_activities,
                    end_activities=end_activities,
                    initial_start_activities=self.initial_start_activities,
                    initial_end_activities=self.initial_end_activities,
                    parameters=parameters))
        elif empty_traces_present and not enough_traces:
            # no node is added to the PT, instead we just use recursion on the log without the empty traces
            self.detect_cut_if(parameters=parameters)
        else:
            if use_act_once_per_trace:
                activity_once, new_log, small_log = fall_through.act_once_per_trace(
                    self.log, self.activities, activity_key)
            else:
                activity_once = False
            if activity_once:
                self.detected_cut = 'parallel'
                # create two new dfgs as we need them to append to self.children later
                new_dfg = [(k, v) for k, v in dfg_inst.apply(
                    new_log, parameters=parameters).items() if v > 0]
                activities = attributes_filter.get_attribute_values(
                    new_log, activity_key)
                small_dfg = [(k, v) for k, v in dfg_inst.apply(
                    small_log, parameters=parameters).items() if v > 0]
                small_activities = attributes_filter.get_attribute_values(
                    small_log, activity_key)
                start_activities = list(
                    start_activities_filter.get_start_activities(
                        new_log, parameters=parameters).keys())
                end_activities = list(
                    end_activities_filter.get_end_activities(
                        new_log, parameters=parameters).keys())
                # append the chosen activity as leaf:
                self.children.append(
                    SubtreeInfrequent(
                        small_log,
                        small_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        small_activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))
                # continue with the recursion on the new log
                self.children.append(
                    SubtreeInfrequent(
                        new_log,
                        new_dfg,
                        self.master_dfg,
                        self.initial_dfg,
                        activities,
                        self.counts,
                        self.rec_depth + 1,
                        self.f,
                        noise_threshold=self.noise_threshold,
                        start_activities=start_activities,
                        end_activities=end_activities,
                        initial_start_activities=self.initial_start_activities,
                        initial_end_activities=self.initial_end_activities,
                        parameters=parameters))

            else:
                if use_act_concurrent:
                    activity_concurrent, new_log, small_log, key = fall_through.activity_concurrent(
                        self,
                        self.log,
                        self.activities,
                        activity_key,
                        parameters=parameters)
                else:
                    activity_concurrent = False
                if activity_concurrent:
                    self.detected_cut = 'parallel'
                    # create two new dfgs on to append later
                    new_dfg = [(k, v) for k, v in dfg_inst.apply(
                        new_log, parameters=parameters).items() if v > 0]
                    activities = attributes_filter.get_attribute_values(
                        new_log, activity_key)
                    small_dfg = [(k, v) for k, v in dfg_inst.apply(
                        small_log, parameters=parameters).items() if v > 0]
                    small_activities = attributes_filter.get_attribute_values(
                        small_log, activity_key)
                    start_activities = list(
                        start_activities_filter.get_start_activities(
                            new_log, parameters=parameters).keys())
                    end_activities = list(
                        end_activities_filter.get_end_activities(
                            new_log, parameters=parameters).keys())
                    # append the concurrent activity as leaf:
                    self.children.append(
                        SubtreeInfrequent(
                            small_log,
                            small_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            small_activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                    # continue with the recursion on the new log:
                    self.children.append(
                        SubtreeInfrequent(
                            new_log,
                            new_dfg,
                            self.master_dfg,
                            self.initial_dfg,
                            activities,
                            self.counts,
                            self.rec_depth + 1,
                            self.f,
                            noise_threshold=self.noise_threshold,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            initial_start_activities=self.
                            initial_start_activities,
                            initial_end_activities=self.initial_end_activities,
                            parameters=parameters))
                else:
                    if use_strict_tau_loop:
                        strict_tau_loop, new_log = fall_through.strict_tau_loop(
                            self.log, self.start_activities,
                            self.end_activities, activity_key)
                    else:
                        strict_tau_loop = False
                    if strict_tau_loop:
                        self.detected_cut = 'strict_tau_loop'
                        new_dfg = [(k, v) for k, v in dfg_inst.apply(
                            new_log, parameters=parameters).items() if v > 0]
                        activities = attributes_filter.get_attribute_values(
                            new_log, activity_key)
                        start_activities = list(
                            start_activities_filter.get_start_activities(
                                new_log, parameters=parameters).keys())
                        end_activities = list(
                            end_activities_filter.get_end_activities(
                                new_log, parameters=parameters).keys())
                        self.children.append(
                            SubtreeInfrequent(
                                new_log,
                                new_dfg,
                                self.master_dfg,
                                self.initial_dfg,
                                activities,
                                self.counts,
                                self.rec_depth + 1,
                                self.f,
                                noise_threshold=self.noise_threshold,
                                start_activities=start_activities,
                                end_activities=end_activities,
                                initial_start_activities=self.
                                initial_start_activities,
                                initial_end_activities=self.
                                initial_end_activities,
                                parameters=parameters))
                    else:
                        if use_tau_loop:
                            tau_loop, new_log = fall_through.tau_loop(
                                self.log, self.start_activities, activity_key)
                        else:
                            tau_loop = False
                        if tau_loop:
                            self.detected_cut = 'tau_loop'
                            new_dfg = [(k, v) for k, v in dfg_inst.apply(
                                new_log, parameters=parameters).items()
                                       if v > 0]
                            activities = attributes_filter.get_attribute_values(
                                new_log, activity_key)
                            start_activities = list(
                                start_activities_filter.get_start_activities(
                                    new_log, parameters=parameters).keys())
                            end_activities = list(
                                end_activities_filter.get_end_activities(
                                    new_log, parameters=parameters).keys())
                            self.children.append(
                                SubtreeInfrequent(
                                    new_log,
                                    new_dfg,
                                    self.master_dfg,
                                    self.initial_dfg,
                                    activities,
                                    self.counts,
                                    self.rec_depth + 1,
                                    self.f,
                                    noise_threshold=self.noise_threshold,
                                    start_activities=start_activities,
                                    end_activities=end_activities,
                                    initial_start_activities=self.
                                    initial_start_activities,
                                    initial_end_activities=self.
                                    initial_end_activities,
                                    parameters=parameters))
                        else:
                            logging.debug("flower_if")
                            self.detected_cut = 'flower'