Exemple #1
0
def apply(df, activity, parameters=None):
    """
    Gets the time passed from each preceding activity

    Parameters
    -------------
    df
        Dataframe
    activity
        Activity that we are considering
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    dictio
        Dictionary containing a 'pre' key with the
        list of aggregates times from each preceding activity to the given activity
    """
    if parameters is None:
        parameters = {}

    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY,
                                              parameters, CASE_CONCEPT_NAME)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(
        Parameters.START_TIMESTAMP_KEY, parameters, None)

    [dfg_frequency, dfg_performance
     ] = pandas.get_dfg_graph(df,
                              measure="both",
                              activity_key=activity_key,
                              case_id_glue=case_id_glue,
                              timestamp_key=timestamp_key,
                              start_timestamp_key=start_timestamp_key)

    pre = []
    sum_perf_pre = 0.0
    sum_acti_pre = 0.0

    for entry in dfg_performance.keys():
        if entry[1] == activity:
            pre.append([
                entry[0],
                float(dfg_performance[entry]),
                int(dfg_frequency[entry])
            ])
            sum_perf_pre = sum_perf_pre + float(
                dfg_performance[entry]) * float(dfg_frequency[entry])
            sum_acti_pre = sum_acti_pre + float(dfg_frequency[entry])

    perf_acti_pre = 0.0
    if sum_acti_pre > 0:
        perf_acti_pre = sum_perf_pre / sum_acti_pre

    return {"pre": pre, "pre_avg_perf": perf_acti_pre}
Exemple #2
0
def discover_dfg(log: Union[EventLog, pd.DataFrame]) -> Tuple[dict, dict, dict]:
    """
    Discovers a DFG from a log

    Parameters
    --------------
    log
        Event log

    Returns
    --------------
    dfg
        DFG
    start_activities
        Start activities
    end_activities
        End activities
    """
    if check_is_dataframe(log):
        check_dataframe_columns(log)
        from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph
        dfg = get_dfg_graph(log)
        from pm4py.statistics.start_activities.pandas import get as start_activities_module
        from pm4py.statistics.end_activities.pandas import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log)
        end_activities = end_activities_module.get_end_activities(log)
    else:
        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        dfg = dfg_discovery.apply(log)
        from pm4py.statistics.start_activities.log import get as start_activities_module
        from pm4py.statistics.end_activities.log import get as end_activities_module
        start_activities = start_activities_module.get_start_activities(log)
        end_activities = end_activities_module.get_end_activities(log)
    return dfg, start_activities, end_activities
def apply(df, activity, parameters=None):
    """
    Gets the time passed to each succeeding activity

    Parameters
    -------------
    df
        Dataframe
    activity
        Activity that we are considering
    parameters
        Possible parameters of the algorithm

    Returns
    -------------
    dictio
        Dictionary containing a 'post' key with the
        list of aggregates times from the given activity to each succeeding activity
    """
    if parameters is None:
        parameters = {}

    case_id_glue = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME
    activity_key = parameters[
        PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else DEFAULT_NAME_KEY
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY

    [dfg_frequency,
     dfg_performance] = pandas.get_dfg_graph(df,
                                             measure="both",
                                             activity_key=activity_key,
                                             case_id_glue=case_id_glue,
                                             timestamp_key=timestamp_key)

    post = []
    sum_perf_post = 0.0
    sum_acti_post = 0.0

    for entry in dfg_performance.keys():
        if entry[0] == activity:
            post.append([
                entry[1],
                float(dfg_performance[entry]),
                int(dfg_frequency[entry])
            ])
            sum_perf_post = sum_perf_post + float(
                dfg_performance[entry]) * float(dfg_frequency[entry])
            sum_acti_post = sum_acti_post + float(dfg_frequency[entry])

    perf_acti_post = 0.0
    if sum_acti_post > 0:
        perf_acti_post = sum_perf_post / sum_acti_post

    return {"post": post, "post_avg_perf": perf_acti_post}
Exemple #4
0
def get_process_svg():
    parameters = request.args.get("parameters")
    parameters = __process_parameters(parameters)

    log = __prepare_event_log(parameters)
    ext_type = parameters[
        "ext_type"] if "ext_type" in parameters else "document_flow_log"
    log_type = __get_log_type_from_ext_type(ext_type)

    if log_type == 0:
        log.type = "succint"
        from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery
        from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory
        model = discovery.apply(log,
                                model_type_variant="model3",
                                node_freq_variant="type31",
                                edge_freq_variant="type11")
        gviz = vis_factory.apply(model, parameters={"format": "svg"})
    elif log_type == 1 or log_type == 2:
        import pandas as pd
        if type(log) is pd.DataFrame:
            from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph
            dfg = get_dfg_graph(log)
            from pm4py.statistics.start_activities.pandas import get as pd_sa_get
            from pm4py.statistics.end_activities.pandas import get as pd_ea_get
            sa = pd_sa_get.get_start_activities(log)
            ea = pd_ea_get.get_end_activities(log)
        else:
            dfg, sa, ea = pm4py.discover_dfg(log)
        act_count = pm4py.get_attribute_values(log, "concept:name")
        dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage(
            dfg, sa, ea, act_count, 0.2, keep_all_activities=True)
        gviz = pm4py.visualization.dfg.visualizer.apply(
            dfg,
            activities_count=act_count,
            parameters={
                "format": "svg",
                "start_activities": sa,
                "end_activities": ea
            })

    ser = pm4py.visualization.dfg.visualizer.serialize(gviz).decode("utf-8")

    return ser