Esempio n. 1
0
def apply_tree(
    event_log: Union[pd.DataFrame, EventLog, EventStream],
    parameters: Optional[Dict[Union[Parameters, str],
                              Any]] = None) -> ProcessTree:
    if parameters is None:
        parameters = {}
    event_log = log_converter.apply(
        event_log,
        variant=log_converter.Variants.TO_EVENT_LOG,
        parameters=parameters)
    act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY.value,
                                         parameters,
                                         xes_constants.DEFAULT_NAME_KEY)

    threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD,
                                           parameters, 0.0)

    if threshold == 0.0:
        # keep one trace per variant; more performant
        event_log = filtering_utils.keep_one_trace_per_variant(
            event_log, parameters=parameters)

    tree = __inductive_miner(
        event_log, discover_dfg.apply(event_log, parameters=parameters),
        threshold, None, act_key,
        exec_utils.get_param_value(Parameters.USE_MSD_PARALLEL_CUT, parameters,
                                   True))

    tree_consistency.fix_parent_pointers(tree)
    tree = generic.fold(tree)
    generic.tree_sort(tree)

    return tree
Esempio n. 2
0
def apply(
    tree: ProcessTree,
    parameters: Optional[Dict[Union[str, Parameters], Any]] = None
) -> graphviz.Graph:
    """
    Obtain a Process Tree representation through GraphViz

    Parameters
    -----------
    tree
        Process tree
    parameters
        Possible parameters of the algorithm

    Returns
    -----------
    gviz
        GraphViz object
    """
    if parameters is None:
        parameters = {}

    parameters = copy(parameters)
    parameters[ROOT_NODE_PARAMETER] = tree

    filename = tempfile.NamedTemporaryFile(suffix='.gv')

    bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters,
                                         "transparent")

    viz = Graph("pt",
                filename=filename.name,
                engine='dot',
                graph_attr={'bgcolor': bgcolor})
    viz.attr('node', shape='ellipse', fixedsize='false')

    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")

    enable_deepcopy = exec_utils.get_param_value(Parameters.ENABLE_DEEPCOPY,
                                                 parameters, False)

    if enable_deepcopy:
        # since the process tree object needs to be sorted in the visualization, make a deepcopy of it before
        # proceeding
        tree = deepcopy(tree)
        generic.tree_sort(tree)

    repr_tree_2(tree, viz, parameters)

    viz.attr(overlap='false')
    viz.attr(splines='false')
    viz.format = image_format

    return viz
Esempio n. 3
0
def apply(tree, parameters=None):
    """
    Obtain a Process Tree representation through GraphViz

    Parameters
    -----------
    tree
        Process tree
    parameters
        Possible parameters of the algorithm

    Returns
    -----------
    gviz
        GraphViz object
    """
    if parameters is None:
        parameters = {}

    filename = tempfile.NamedTemporaryFile(suffix='.gv')
    viz = Graph("pt",
                filename=filename.name,
                engine='dot',
                graph_attr={'bgcolor': 'transparent'})
    viz.attr('node', shape='ellipse', fixedsize='false')

    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")
    color_map = exec_utils.get_param_value(Parameters.COLOR_MAP, parameters,
                                           {})

    enable_deepcopy = exec_utils.get_param_value(Parameters.ENABLE_DEEPCOPY,
                                                 parameters, True)

    if enable_deepcopy:
        # since the process tree object needs to be sorted in the visualization, make a deepcopy of it before
        # proceeding
        tree = deepcopy(tree)
        util.tree_sort(tree)

    repr_tree(tree, viz, color_map, parameters)

    viz.attr(overlap='false')
    viz.attr(splines='false')
    viz.format = image_format

    return viz
Esempio n. 4
0
def apply_tree_dfg(dfg: Dict[Tuple[str, str], int],
                   start_activities: Dict[str, int],
                   end_activities: Dict[str, int],
                   activities: Dict[str, int],
                   parameters=None):
    if parameters is None:
        parameters = {}

    dfg_sa_ea_actcount = DfgSaEaActCount(dfg, start_activities, end_activities,
                                         activities)
    threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD,
                                           parameters, 0.0)

    tree = dfg_im.__imd(dfg_sa_ea_actcount, threshold, None)

    tree_consistency.fix_parent_pointers(tree)
    tree = generic.fold(tree)
    generic.tree_sort(tree)

    return tree
Esempio n. 5
0
def apply_tree(event_log: Union[pd.DataFrame, EventLog, EventStream],
               parameters: Optional[Dict[str, Any]] = None) -> ProcessTree:
    if parameters is None:
        parameters = {}
    event_log = log_converter.apply(event_log, parameters=parameters)
    if type(event_log) is not EventLog:
        raise ValueError(
            'input argument log should be of type pandas.DataFrame, Event Log or Event Stream'
        )
    act_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY.value,
                                         parameters,
                                         xes_constants.DEFAULT_NAME_KEY)

    if exec_utils.get_param_value(Parameters.DFG_ONLY, parameters, False):
        event_log = None

    threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD,
                                           parameters, 0.0)

    if threshold == 0.0:
        # keep one trace per variant; more performant
        event_log = filtering_utils.keep_one_trace_per_variant(
            event_log, parameters=parameters)

    tree = inductive_miner(
        event_log,
        discover_dfg.apply(
            event_log,
            parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: act_key}),
        threshold, None, act_key,
        exec_utils.get_param_value(Parameters.USE_MSD_PARALLEL_CUT, parameters,
                                   True))

    tree_consistency.fix_parent_pointers(tree)
    tree = generic.fold(tree)
    generic.tree_sort(tree)

    return tree
Esempio n. 6
0
def apply(net, im, fm, parameters=None):
    """
    Transforms a WF-net to a process tree

    Parameters
    -------------
    net
        Petri net
    im
        Initial marking
    fm
        Final marking

    Returns
    -------------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    debug = exec_utils.get_param_value(Parameters.DEBUG, parameters, False)
    fold = exec_utils.get_param_value(Parameters.FOLD, parameters, True)

    grouped_net = group_blocks_in_net(net, parameters=parameters)

    if len(grouped_net.transitions) == 1:
        pt_str = list(grouped_net.transitions)[0].label
        pt = pt_util.parse(pt_str)
        ret = pt_util.fold(pt) if fold else pt
        tree_sort(ret)
        return ret
    else:
        if debug:
            from pm4py.visualization.petri_net import visualizer as pn_viz
            pn_viz.view(pn_viz.apply(grouped_net, parameters={"format":
                                                              "svg"}))
        raise ValueError('Parsing of WF-net Failed')
Esempio n. 7
0
LOGS_FOLDER = "../compressed_input_data"

for log_name in os.listdir(LOGS_FOLDER):
    if "xes" in log_name:
        bpmn_output_path = tempfile.NamedTemporaryFile(suffix=".bpmn")
        bpmn_output_path.close()
        bpmn_output_path = bpmn_output_path.name
        log_path = os.path.join(LOGS_FOLDER, log_name)
        print("")
        print(log_path)
        log = pm4py.read_xes(log_path)
        fp_log = pm4py.algo.discovery.footprints.log.variants.entire_event_log.apply(
            log)
        tree = pm4py.discover_process_tree_inductive(log)
        generic.tree_sort(tree)
        fp_tree = pm4py.algo.discovery.footprints.tree.variants.bottomup.apply(
            tree)
        fp_conf = pm4py.algo.conformance.footprints.variants.log_extensive.apply(
            fp_log, fp_tree)
        fitness0 = pm4py.algo.conformance.footprints.util.evaluation.fp_fitness(
            fp_log, fp_tree, fp_conf)
        precision0 = pm4py.algo.conformance.footprints.util.evaluation.fp_precision(
            fp_log, fp_tree)
        print("fitness 0 = ", fitness0)
        print("precision 0 = ", precision0)
        net, im, fm = pm4py.objects.conversion.process_tree.variants.to_petri_net.apply(
            tree)
        bpmn_graph = pm4py.objects.conversion.wf_net.variants.to_bpmn.apply(
            net, im, fm)
        bpmn_graph = layouter.apply(bpmn_graph)
Esempio n. 8
0
def apply_tree_dfg(dfg,
                   parameters=None,
                   activities=None,
                   contains_empty_traces=False,
                   start_activities=None,
                   end_activities=None):
    """
    Apply the IMDF algorithm to a DFG graph obtaining a process tree

    Parameters
    ----------
    dfg
        Directly-follows graph
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)
    activities
        Activities of the process (default None)
    contains_empty_traces
        Boolean value that is True if the event log from which the DFG has been extracted contains empty traces
    start_activities
        If provided, the start activities of the log
    end_activities
        If provided, the end activities of the log

    Returns
    ----------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    noise_threshold = exec_utils.get_param_value(Parameters.NOISE_THRESHOLD,
                                                 parameters, 0.0)

    if type(dfg) is Counter or type(dfg) is dict:
        newdfg = []
        for key in dfg:
            value = dfg[key]
            newdfg.append((key, value))
        dfg = newdfg

    c = Counts()
    s = SubtreeDFGBased(dfg,
                        dfg,
                        dfg,
                        activities,
                        c,
                        0,
                        noise_threshold=noise_threshold,
                        initial_start_activities=start_activities,
                        initial_end_activities=end_activities)

    tree_repr = get_tree_repr_dfg_based.get_repr(
        s, 0, contains_empty_traces=contains_empty_traces)
    # Ensures consistency to the parent pointers in the process tree
    tree_consistency.fix_parent_pointers(tree_repr)
    # Fixes a 1 child XOR that is added when single-activities flowers are found
    tree_consistency.fix_one_child_xor_flower(tree_repr)
    # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
    tree_repr = generic.fold(tree_repr)
    # sorts the process tree to ensure consistency in different executions of the algorithm
    tree_sort(tree_repr)

    return tree_repr
Esempio n. 9
0
def apply_tree(log, parameters):
    """
    Apply the IM_FF algorithm to a log obtaining a process tree

    Parameters
    ----------
    log
        Log
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> attribute of the log to use as activity name
            (default concept:name)

    Returns
    ----------
    process_tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    if pkgutil.find_loader("pandas"):
        import pandas as pd
        from pm4py.statistics.variants.pandas import get as variants_get

        if type(log) is pd.DataFrame:
            vars = variants_get.get_variants_count(log, parameters=parameters)
            return apply_tree_variants(vars, parameters=parameters)

    activity_key = exec_utils.get_param_value(
        Parameters.ACTIVITY_KEY, parameters,
        pmutil.xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log, parameters=parameters)
    # keep only the activity attribute (since the others are not used)
    log = filtering_utils.keep_only_one_attribute_per_event(log, activity_key)

    noise_threshold = exec_utils.get_param_value(
        Parameters.NOISE_THRESHOLD, parameters,
        shared_constants.NOISE_THRESHOLD_IMF)

    dfg = [(k, v)
           for k, v in dfg_inst.apply(log, parameters=parameters).items()
           if v > 0]
    c = Counts()
    activities = attributes_get.get_attribute_values(log, activity_key)
    start_activities = list(
        start_activities_get.get_start_activities(
            log, parameters=parameters).keys())
    end_activities = list(
        end_activities_get.get_end_activities(log,
                                              parameters=parameters).keys())
    contains_empty_traces = False
    traces_length = [len(trace) for trace in log]
    if traces_length:
        contains_empty_traces = min([len(trace) for trace in log]) == 0

    # set the threshold parameter based on f and the max value in the dfg:
    max_value = 0
    for key, value in dfg:
        if value > max_value:
            max_value = value
    threshold = noise_threshold * max_value

    recursion_depth = 0
    sub = subtree.make_tree(log,
                            dfg,
                            dfg,
                            dfg,
                            activities,
                            c,
                            recursion_depth,
                            noise_threshold,
                            threshold,
                            start_activities,
                            end_activities,
                            start_activities,
                            end_activities,
                            parameters=parameters)

    process_tree = get_tree_repr_implain.get_repr(
        sub, 0, contains_empty_traces=contains_empty_traces)
    # Ensures consistency to the parent pointers in the process tree
    tree_consistency.fix_parent_pointers(process_tree)
    # Fixes a 1 child XOR that is added when single-activities flowers are found
    tree_consistency.fix_one_child_xor_flower(process_tree)
    # folds the process tree (to simplify it in case fallthroughs/filtering is applied)
    process_tree = generic.fold(process_tree)
    # sorts the process tree to ensure consistency in different executions of the algorithm
    tree_sort(process_tree)

    return process_tree
Esempio n. 10
0
def import_tree_from_xml_object(root, parameters=None):
    """
    Imports a process tree from the XML object

    Parameters
    ---------------
    root
        Root of the XML object
    parameters
        Possible parameters

    Returns
    ---------------
    tree
        Process tree
    """
    if parameters is None:
        parameters = {}

    nodes = {}

    for c0 in root:
        root = c0.get("root")
        for child in c0:
            tag = child.tag
            id = child.get("id")
            name = child.get("name")
            sourceId = child.get("sourceId")
            targetId = child.get("targetId")
            if name is not None:
                # node
                if tag == "and":
                    operator = Operator.PARALLEL
                    label = None
                elif tag == "sequence":
                    operator = Operator.SEQUENCE
                    label = None
                elif tag == "xor":
                    operator = Operator.XOR
                    label = None
                elif tag == "xorLoop":
                    operator = Operator.LOOP
                    label = None
                elif tag == "or":
                    operator = Operator.OR
                    label = None
                elif tag == "manualTask":
                    operator = None
                    label = name
                elif tag == "automaticTask":
                    operator = None
                    label = None
                else:
                    raise Exception("unknown tag: " + tag)
                tree = ProcessTree(operator=operator, label=label)
                nodes[id] = tree
            else:
                nodes[sourceId].children.append(nodes[targetId])
                nodes[targetId].parent = nodes[sourceId]

    # make sure that .PTML files having loops with 3 children are imported
    # into the PM4Py process tree structure
    # we want loops to have two children
    for node in nodes.values():
        if node.operator == Operator.LOOP and len(node.children) == 3:
            if not (node.children[2].operator is None
                    and node.children[2].label is None):
                parent_node = node.parent
                new_parent_node = ProcessTree(operator=Operator.SEQUENCE,
                                              label=None)
                node.parent = new_parent_node
                new_parent_node.children.append(node)
                node.children[2].parent = new_parent_node
                new_parent_node.children.append(node.children[2])
                if parent_node is not None:
                    new_parent_node.parent = parent_node
                    del parent_node.children[parent_node.children.index(node)]
                    parent_node.children.append(new_parent_node)
            del node.children[2]

    root = nodes[root]
    tree_sort(root)
    return root