Esempio n. 1
0
 def test_prefiltering_dataframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv")
     dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(input_log, sep=',')
     dataframe = attributes_filter.filter_df_keeping_spno_activities(dataframe, activity_key="concept:name")
     dataframe = case_filter.filter_on_ncases(dataframe, case_id_glue="case:concept:name")
     dataframe = csv_import_adapter.convert_timestamp_columns_in_df(dataframe)
     dataframe = dataframe.sort_values('time:timestamp')
     event_log = log_conv_fact.apply(dataframe, variant=log_conv_fact.TO_EVENT_STREAM)
     log = log_conv_fact.apply(event_log)
     del log
Esempio n. 2
0
def execute_script():
    aa = time.time()
    dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(
        inputLog, sep=',')
    dataframe = csv_import_adapter.convert_caseid_column_to_str(
        dataframe, case_id_glue=CASEID_GLUE)
    dataframe = csv_import_adapter.convert_timestamp_columns_in_df(
        dataframe, timest_format=TIMEST_FORMAT, timest_columns=TIMEST_COLUMNS)
    dataframe = dataframe.sort_values([CASEID_GLUE, TIMEST_KEY])
    dataframe_fa = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=ACTIVITY_KEY,
        max_no_activities=MAX_NO_ACTIVITIES)
    bb = time.time()
    print("importing log time=", (bb - aa))

    parameters_cde = {
        constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE,
        constants.PARAMETER_CONSTANT_TIMESTAMP_KEY: TIMEST_KEY,
        "sort_by_column": "caseDuration",
        "sort_ascending": False,
        "max_ret_cases": 1000
    }
    cases_desc = case_statistics.get_cases_description(
        dataframe, parameters=parameters_cde)

    print(cases_desc)
    bb2 = time.time()
    print("calculating and printing cases_desc = ", (bb2 - bb))
    calculate_process_schema_from_df(dataframe_fa, "NOFILTERS_FREQUENCY.svg",
                                     "NOFILTERS_PERFORMANCE.svg")
    GENERATED_IMAGES.append("NOFILTERS_FREQUENCY.svg")
    GENERATED_IMAGES.append("NOFILTERS_PERFORMANCE.svg")
    if DELETE_VARIABLES:
        del dataframe_fa
    cc = time.time()
    print(
        "saving initial Inductive Miner process schema along with frequency metrics=",
        (cc - bb2))

    dataframe_cp = case_filter.filter_on_case_performance(
        dataframe,
        case_id_glue=CASEID_GLUE,
        timestamp_key=TIMEST_KEY,
        min_case_performance=100000,
        max_case_performance=10000000)
    dataframe_cp_fa = attributes_filter.filter_df_keeping_spno_activities(
        dataframe_cp,
        activity_key=ACTIVITY_KEY,
        max_no_activities=MAX_NO_ACTIVITIES)
    dataframe_cp = None
    if DELETE_VARIABLES:
        del dataframe_cp
    calculate_process_schema_from_df(dataframe_cp_fa,
                                     "FILTER_CP_FREQUENCY.svg",
                                     "FILTER_CP_PERFORMANCE.svg")
    GENERATED_IMAGES.append("FILTER_CP_FREQUENCY.svg")
    GENERATED_IMAGES.append("FILTER_CP_PERFORMANCE.svg")
    if DELETE_VARIABLES:
        del dataframe_cp_fa
    dd = time.time()
    print("filtering on case performance and generating process schema=",
          (dd - cc))

    if ENABLE_ATTRIBUTE_FILTER:
        parameters_att = {
            constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE,
            constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: ATTRIBUTE_TO_FILTER,
            constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ATTRIBUTE_TO_FILTER,
            "positive": True
        }
        dataframe_att = attributes_filter.apply(dataframe,
                                                ATTRIBUTE_VALUES_TO_FILTER,
                                                parameters=parameters_att)
        # dataframe_att = attributes_filter.apply_auto_filter(dataframe, parameters=parameters_att)
        print(
            "all the activities in the log",
            attributes_filter.get_attribute_values(dataframe_att,
                                                   ACTIVITY_KEY))
        dataframe_att_fa = attributes_filter.filter_df_keeping_spno_activities(
            dataframe_att,
            activity_key=ACTIVITY_KEY,
            max_no_activities=MAX_NO_ACTIVITIES)
        if DELETE_VARIABLES:
            del dataframe_att
        calculate_process_schema_from_df(dataframe_att_fa,
                                         "FILTER_ATT_FREQUENCY.svg",
                                         "FILTER_ATT_PERFORMANCE.svg")
        GENERATED_IMAGES.append("FILTER_ATT_FREQUENCY.svg")
        GENERATED_IMAGES.append("FILTER_ATT_PERFORMANCE.svg")
        if DELETE_VARIABLES:
            del dataframe_att_fa
        ee = time.time()
        print("filtering on attribute values and generating process schema=",
              (ee - dd))

    ee = time.time()
    parameters_sa = {
        constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE,
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY
    }
    parameters_ea = {
        constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE,
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY
    }
    start_act = start_activities_filter.get_start_activities(
        dataframe, parameters=parameters_sa)
    print("start activities in the log = ", start_act)
    end_act = end_activities_filter.get_end_activities(
        dataframe, parameters=parameters_ea)
    print("end activities in the log = ", end_act)
    ff = time.time()
    print("finding start and end activities along with their count", (ff - ee))

    if ENABLE_STARTACT_FILTER:
        dataframe_sa = start_activities_filter.apply(dataframe,
                                                     STARTACT_TO_FILTER,
                                                     parameters=parameters_sa)
        # dataframe_sa = start_activities_filter.apply_auto_filter(dataframe, parameters=parameters_sa)
        start_act = start_activities_filter.get_start_activities(
            dataframe_sa, parameters=parameters_sa)
        print("start activities in the filtered log = ", start_act)
        dataframe_sa_fa = attributes_filter.filter_df_keeping_spno_activities(
            dataframe_sa,
            activity_key=ACTIVITY_KEY,
            max_no_activities=MAX_NO_ACTIVITIES)
        if DELETE_VARIABLES:
            del dataframe_sa
        calculate_process_schema_from_df(dataframe_sa_fa,
                                         "FILTER_SA_FREQUENCY.svg",
                                         "FILTER_SA_PERFORMANCE.svg")
        GENERATED_IMAGES.append("FILTER_SA_FREQUENCY.svg")
        GENERATED_IMAGES.append("FILTER_SA_PERFORMANCE.svg")
        if DELETE_VARIABLES:
            del dataframe_sa_fa
    gg = time.time()
    if ENABLE_STARTACT_FILTER:
        print("filtering start activities time=", (gg - ff))

    if ENABLE_ENDACT_FILTER:
        dataframe_ea = end_activities_filter.apply(dataframe,
                                                   ENDACT_TO_FILTER,
                                                   parameters=parameters_ea)
        # dataframe_ea = end_activities_filter.apply_auto_filter(dataframe, parameters=parameters_ea)
        end_act = end_activities_filter.get_end_activities(
            dataframe_ea, parameters=parameters_ea)
        print("end activities in the filtered log = ", end_act)
        dataframe_ea_fa = attributes_filter.filter_df_keeping_spno_activities(
            dataframe_ea,
            activity_key=ACTIVITY_KEY,
            max_no_activities=MAX_NO_ACTIVITIES)
        if DELETE_VARIABLES:
            del dataframe_ea
        calculate_process_schema_from_df(dataframe_ea_fa,
                                         "FILTER_EA_FREQUENCY.svg",
                                         "FILTER_EA_PERFORMANCE.svg")
        GENERATED_IMAGES.append("FILTER_EA_FREQUENCY.svg")
        GENERATED_IMAGES.append("FILTER_EA_PERFORMANCE.svg")
        if DELETE_VARIABLES:
            del dataframe_ea_fa
    hh = time.time()
    if ENABLE_ENDACT_FILTER:
        print("filtering end activities time=", (hh - gg))

    if REMOVE_GENERATED_IMAGES:
        for image in GENERATED_IMAGES:
            os.remove(image)
Esempio n. 3
0
def apply(dataframe, parameters=None):
    """
    Gets the performance DFG

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())
    [dfg, dfg_perf
     ] = df_statistics.get_dfg_graph(dataframe,
                                     activity_key=activity_key,
                                     timestamp_key=timestamp_key,
                                     case_id_glue=case_id_glue,
                                     sort_caseid_required=False,
                                     sort_timestamp_along_case_id=False,
                                     measure="both")
    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg}
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())
    gviz = dfg_vis_factory.apply(dfg_perf,
                                 activities_count=activities_count,
                                 variant="performance",
                                 parameters={
                                     "format": "svg",
                                     "start_activities": start_activities,
                                     "end_activities": end_activities
                                 })

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "perf", None, "", activity_key
Esempio n. 4
0
def apply(dataframe, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by performance metric

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())

    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())

    [dfg, dfg_perf
     ] = df_statistics.get_dfg_graph(dataframe,
                                     activity_key=activity_key,
                                     timestamp_key=timestamp_key,
                                     case_id_glue=case_id_glue,
                                     sort_caseid_required=False,
                                     sort_timestamp_along_case_id=False,
                                     measure="both")
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg}

    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)
    spaths = get_shortest_paths(net)

    bpmn_graph, el_corr, inv_el_corr, el_corr_keys_map = petri_to_bpmn.apply(
        net, im, fm)

    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net, dfg_perf, spaths, activities_count, variant="performance")

    bpmn_aggreg_statistics = convert_performance_map.convert_performance_map_to_bpmn(
        aggregated_statistics, inv_el_corr)
    #bpmn_graph = bpmn_embedding.embed_info_into_bpmn(bpmn_graph, bpmn_aggreg_statistics, "performance")
    bpmn_graph = bpmn_diagram_layouter.apply(bpmn_graph)
    bpmn_string = bpmn_exporter.get_string_from_bpmn(bpmn_graph)

    gviz = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "svg"})
    gviz2 = bpmn_vis_factory.apply_petri(
        net,
        im,
        fm,
        aggregated_statistics=aggregated_statistics,
        variant="performance",
        parameters={"format": "dot"})

    gviz_base64 = get_base64_from_file(gviz2.name)

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return get_base64_from_file(gviz.name), export_petri_as_string(
        net, im, fm
    ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "indbpmn", "perf", bpmn_string, ".bpmn", activity_key
Esempio n. 5
0
def apply(dataframe, parameters=None):
    """
    Gets the Petri net through Inductive Miner, decorated by frequency metric

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())
    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())

    dfg = df_statistics.get_dfg_graph(dataframe,
                                      activity_key=activity_key,
                                      timestamp_key=timestamp_key,
                                      case_id_glue=case_id_glue,
                                      sort_caseid_required=False,
                                      sort_timestamp_along_case_id=False)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    net, im, fm = inductive_miner.apply_dfg(dfg,
                                            parameters,
                                            activities=activities,
                                            start_activities=start_activities,
                                            end_activities=end_activities)
    spaths = get_shortest_paths(net)
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net, dfg, spaths, activities_count, variant="frequency")
    gviz = pn_vis_factory.apply(net,
                                im,
                                fm,
                                parameters={"format": "svg"},
                                variant="frequency",
                                aggregated_statistics=aggregated_statistics)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_petri(net, im, fm)

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
Esempio n. 6
0
def apply(df, parameters=None, classic_output=False):
    """
    Gets a simple model out of a Pandas dataframe

    Parameters
    -------------
    df
        Pandas dataframe
    parameters
        Parameters of the algorithm, including:
            maximum_number_activities -> Maximum number of activities to keep
            discovery_algorithm -> Discovery algorithm to use (alpha, inductive)
            desidered_output -> Desidered output of the algorithm (default: Petri)
            include_filtered_df -> Include the filtered dataframe in the output
            include_dfg_frequency -> Include the DFG of frequencies in the output
            include_dfg_performance -> Include the DFG of performance in the output
            include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output
            include_filtered_dfg_performance -> Include the filtered DFG of performance in the output
    classic_output
        Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking)
        or can return a more detailed dictionary
    """
    if parameters is None:
        parameters = {}

    if PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_CASEID_KEY] = CASE_CONCEPT_NAME
    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = DEFAULT_NAME_KEY
    if PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] = DEFAULT_TIMESTAMP_KEY
    if PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
            PARAMETER_CONSTANT_ACTIVITY_KEY]

    returned_dictionary = {}

    caseid_glue = parameters[PARAMETER_CONSTANT_CASEID_KEY]
    activity_key = parameters[PARAMETER_CONSTANT_ACTIVITY_KEY]
    timest_key = parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY]

    net = None
    initial_marking = None
    final_marking = None
    bpmn_graph = None

    maximum_number_activities = parameters[
        "maximum_number_activities"] if "maximum_number_activities" in parameters else 20
    discovery_algorithm = parameters[
        "discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic"
    desidered_output = parameters[
        "desidered_output"] if "desidered_output" in parameters else "petri"
    include_filtered_df = parameters[
        "include_filtered_df"] if "include_filtered_df" in parameters else True
    include_dfg_frequency = parameters[
        "include_dfg_frequency"] if "include_dfg_frequency" in parameters else True
    include_dfg_performance = parameters[
        "include_dfg_performance"] if "include_dfg_performance" in parameters else True
    include_filtered_dfg_frequency = parameters[
        "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True
    include_filtered_dfg_performance = parameters[
        "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else True

    df = attributes_filter.filter_df_keeping_spno_activities(
        df,
        activity_key=activity_key,
        max_no_activities=maximum_number_activities)

    filtered_df = None

    if "alpha" in discovery_algorithm:
        filtered_df = start_activities_filter.apply_auto_filter(
            df, parameters=parameters)
        filtered_df = end_activities_filter.apply_auto_filter(
            filtered_df, parameters=parameters)
        filtered_df = filter_topvariants_soundmodel.apply(
            filtered_df, parameters=parameters)
    elif "inductive" in discovery_algorithm:
        filtered_df = auto_filter.apply_auto_filter(df, parameters=parameters)

    [dfg_frequency,
     dfg_performance] = dfg_util.get_dfg_graph(df,
                                               measure="both",
                                               perf_aggregation_key="mean",
                                               case_id_glue=caseid_glue,
                                               activity_key=activity_key,
                                               timestamp_key=timest_key)

    [filtered_dfg_frequency, filtered_dfg_performance
     ] = dfg_util.get_dfg_graph(filtered_df,
                                measure="both",
                                perf_aggregation_key="mean",
                                case_id_glue=caseid_glue,
                                activity_key=activity_key,
                                timestamp_key=timest_key)

    if "alpha" in discovery_algorithm:
        net, initial_marking, final_marking = alpha_miner.apply_dfg(
            filtered_dfg_frequency, parameters=parameters)

    if filtered_df is not None and include_filtered_df:
        returned_dictionary["filtered_df"] = filtered_df
    if net is not None and desidered_output == "petri":
        returned_dictionary["net"] = net
    if initial_marking is not None and desidered_output == "petri":
        returned_dictionary["initial_marking"] = initial_marking
    if final_marking is not None and desidered_output == "petri":
        returned_dictionary["final_marking"] = final_marking
    if bpmn_graph is not None and desidered_output == "bpmn":
        returned_dictionary["bpmn_graph"] = bpmn_graph
    if dfg_frequency is not None and include_dfg_frequency:
        returned_dictionary["dfg_frequency"] = dfg_frequency
    if dfg_performance is not None and include_dfg_performance:
        returned_dictionary["dfg_performance"] = dfg_performance
    if filtered_dfg_frequency is not None and include_filtered_dfg_frequency:
        returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency
    if filtered_dfg_performance is not None and include_filtered_dfg_performance:
        returned_dictionary[
            "filtered_dfg_performance"] = filtered_dfg_performance

    if classic_output:
        if net is not None and desidered_output == "petri":
            return net, initial_marking, final_marking

    return returned_dictionary
Esempio n. 7
0
def apply(dataframe, parameters=None):
    """
    Gets the performance HNet

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else ws_constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=ws_constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities_count = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    start_activities_count = start_activities_filter.get_start_activities(
        dataframe, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(start_activities_count.keys())
    end_activities = list(end_activities_count.keys())

    dfg_frequency, dfg_performance = df_statistics.get_dfg_graph(
        dataframe,
        case_id_glue=case_id_glue,
        activity_key=activity_key,
        timestamp_key=timestamp_key,
        measure="both",
        sort_caseid_required=False,
        sort_timestamp_along_case_id=False)
    heu_net = HeuristicsNet(dfg_frequency,
                            performance_dfg=dfg_performance,
                            activities=activities,
                            start_activities=start_activities,
                            end_activities=end_activities,
                            activities_occurrences=activities_count)
    heu_net.calculate(dfg_pre_cleaning_noise_thresh=ws_constants.
                      DEFAULT_DFG_CLEAN_MULTIPLIER * decreasingFactor)

    vis = heu_vis_factory.apply(heu_net, parameters={"format": "svg"})
    vis2 = heu_vis_factory.apply(heu_net, parameters={"format": "dot"})

    gviz_base64 = get_base64_from_file(vis2.name)

    return get_base64_from_file(vis.name), None, "", "parquet", activities, start_activities, end_activities, gviz_base64, [], "heuristics", "perf", None, "", activity_key
Esempio n. 8
0
def apply(dataframe, parameters=None):
    """
    Gets the process tree using Inductive Miner Directly-Follows

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())

    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())

    dfg = df_statistics.get_dfg_graph(dataframe,
                                      activity_key=activity_key,
                                      timestamp_key=timestamp_key,
                                      case_id_glue=case_id_glue,
                                      sort_caseid_required=False,
                                      sort_timestamp_along_case_id=False)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    tree = inductive_miner.apply_tree_dfg(dfg,
                                          parameters,
                                          activities=activities,
                                          start_activities=start_activities,
                                          end_activities=end_activities)
    gviz = pt_vis_factory.apply(tree, parameters={"format": "svg"})

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    return get_base64_from_gviz(gviz), None, "", "parquet", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key