コード例 #1
0
ファイル: dfg_min_ex_log.py プロジェクト: pm4py/pm4py-core
def execute_script():
    log_path = os.path.join("..", "tests", "input_data", "interval_event_log.xes")
    #log_path = os.path.join("..", "tests", "input_data", "reviewing.xes")
    log = xes_importer.apply(log_path)
    parameters = {}
    parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name"
    parameters["strict"] = False
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(log, parameters=parameters)
    end_activities = ea_get.get_end_activities(log, parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(log, parameters=parameters)
    print("soj_time")
    print(soj_time)
    conc_act = conc_act_get.apply(log, parameters=parameters)
    print("conc_act")
    print(conc_act)
    efg = efg_get.apply(log, parameters=parameters)
    print("efg")
    print(efg)
    dfg_freq = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.FREQUENCY)
    dfg_perf = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.PERFORMANCE)
    dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, log=log, variant=dfg_vis_fact.Variants.FREQUENCY,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_freq)
    dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, log=log, variant=dfg_vis_fact.Variants.PERFORMANCE,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_perf)
    net, im, fm = dfg_conv.apply(dfg_freq)
    gviz = pn_vis.apply(net, im, fm, parameters=parameters)
    pn_vis.view(gviz)
コード例 #2
0
def gerar_view_dfg_model(eventLog,
                         dfg,
                         metric_type='FREQUENCY',
                         image_format='png'):
    gviz = None

    if metric_type == 'PERFORMANCE':
        parameters = {
            dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT:
            image_format
        }
        # Visualise
        gviz = dfg_visualization.apply(
            dfg,
            log=eventLog,
            variant=dfg_visualization.Variants.PERFORMANCE,
            parameters=parameters)
    elif metric_type == 'FREQUENCY':
        parameters = {
            dfg_visualization.Variants.FREQUENCY.value.Parameters.FORMAT:
            image_format
        }
        # Visualise
        gviz = dfg_visualization.apply(
            dfg,
            log=eventLog,
            variant=dfg_visualization.Variants.FREQUENCY,
            parameters=parameters)
    else:
        print("Invalid metric_type: " + metric_type)

    return gviz
コード例 #3
0
ファイル: corr_mining.py プロジェクト: yoannlgd1/pm4py-core
def execute_script():
    df = pd.read_csv("../tests/input_data/interval_event_log.csv")
    df = dataframe_utils.convert_timestamp_columns_in_df(df)
    act_count = dict(df["concept:name"].value_counts())
    parameters = {}
    parameters[
        constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(df, parameters=parameters)
    end_activities = ea_get.get_end_activities(df, parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(df, parameters=parameters)
    dfg, performance_dfg = correlation_miner.apply(
        df, variant=correlation_miner.Variants.CLASSIC, parameters=parameters)
    gviz_freq = dfg_vis.apply(dfg,
                              activities_count=act_count,
                              soj_time=soj_time,
                              variant=dfg_vis.Variants.FREQUENCY,
                              parameters=parameters)
    dfg_vis.view(gviz_freq)
    gviz_perf = dfg_vis.apply(performance_dfg,
                              activities_count=act_count,
                              soj_time=soj_time,
                              variant=dfg_vis.Variants.PERFORMANCE,
                              parameters=parameters)
    dfg_vis.view(gviz_perf)
コード例 #4
0
ファイル: corr_mining.py プロジェクト: escort94/KSIA
def execute_script():
    df = pd.read_csv("../tests/input_data/receipt.csv")
    df = dataframe_utils.convert_timestamp_columns_in_df(df)
    act_count = dict(df["concept:name"].value_counts())
    dfg, performance_dfg = correlation_miner.apply(df, variant=correlation_miner.Variants.CLASSIC)
    gviz_freq = dfg_vis.apply(dfg, activities_count=act_count, variant=dfg_vis.Variants.FREQUENCY,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_freq)
    gviz_perf = dfg_vis.apply(performance_dfg, activities_count=act_count, variant=dfg_vis.Variants.PERFORMANCE,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_perf)
コード例 #5
0
def execute_script():
    log_path = os.path.join("..", "tests", "input_data",
                            "interval_event_log.csv")
    dataframe = pm4py.read_csv(log_path)
    log_path = os.path.join("..", "tests", "input_data", "reviewing.xes")
    log = pm4py.read_xes(log_path)
    dataframe = pm4py.convert_to_dataframe(log)
    parameters = {}
    #parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name"
    parameters[constants.PARAMETER_CONSTANT_CASEID_KEY] = "case:concept:name"
    parameters["strict"] = True
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(dataframe,
                                                   parameters=parameters)
    end_activities = ea_get.get_end_activities(dataframe,
                                               parameters=parameters)
    att_count = att_get.get_attribute_values(dataframe,
                                             "concept:name",
                                             parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(dataframe, parameters=parameters)
    print("soj_time")
    print(soj_time)
    conc_act = conc_act_get.apply(dataframe, parameters=parameters)
    print("conc_act")
    print(conc_act)
    efg = efg_get.apply(dataframe, parameters=parameters)
    print("efg")
    print(efg)
    dfg_freq, dfg_perf = df_statistics.get_dfg_graph(
        dataframe, measure="both", start_timestamp_key="start_timestamp")
    dfg_gv_freq = dfg_vis_fact.apply(dfg_freq,
                                     activities_count=att_count,
                                     variant=dfg_vis_fact.Variants.FREQUENCY,
                                     soj_time=soj_time,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_freq)
    dfg_gv_perf = dfg_vis_fact.apply(dfg_perf,
                                     activities_count=att_count,
                                     variant=dfg_vis_fact.Variants.PERFORMANCE,
                                     soj_time=soj_time,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_perf)
    net, im, fm = dfg_conv.apply(dfg_freq)
    gviz = pn_vis.apply(net, im, fm, parameters=parameters)
    pn_vis.view(gviz)
コード例 #6
0
ファイル: vis.py プロジェクト: snambia/pm4py-core
def save_vis_dfg(dfg, start_activities, end_activities, file_path, log=None):
    """
    Saves a DFG visualization to a file

    Parameters
    --------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    file_path
        Destination path
    """
    format = file_path[file_path.index(".") + 1:].lower()
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters
    gviz = dfg_visualizer.apply(dfg,
                                log=log,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                parameters={
                                    parameters.FORMAT: format,
                                    parameters.START_ACTIVITIES:
                                    start_activities,
                                    parameters.END_ACTIVITIES: end_activities
                                })
    dfg_visualizer.save(gviz, file_path)
コード例 #7
0
ファイル: vis.py プロジェクト: snambia/pm4py-core
def view_dfg(dfg, start_activities, end_activities, format="png", log=None):
    """
    Views a (composite) DFG

    Parameters
    -------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    format
        Format of the output picture (default: png)
    """
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters
    gviz = dfg_visualizer.apply(dfg,
                                log=log,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                parameters={
                                    parameters.FORMAT: format,
                                    parameters.START_ACTIVITIES:
                                    start_activities,
                                    parameters.END_ACTIVITIES: end_activities
                                })
    dfg_visualizer.view(gviz)
コード例 #8
0
def execute_script():
    log_input_directory = "xesinput"
    all_logs_names = os.listdir(log_input_directory)
    all_logs_names = [log for log in all_logs_names if ".xe" in log]

    for logName in all_logs_names:
        # logPath = os.path.join("..", "tests", "inputData", logName)
        log_path = log_input_directory + "\\" + logName
        log = xes_importer.apply(log_path)
        print("\n\n")
        print("log loaded")
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
        exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName
        print("exporting log", exp_log_name)
        xes_exporter.apply(log, exp_log_name)
        print("exported log", exp_log_name)

        log, classifier_attr_key = insert_classifier.search_act_class_attr(log)

        classifiers = list(log.classifiers.keys())
        if classifier_attr_key is None and classifiers:
            try:
                print(classifiers)
                log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute(
                    log, classifiers[0])
                print(classifier_attr_key)
            except:
                print("exception in handling classifier")

        if classifier_attr_key is None:
            classifier_attr_key = "concept:name"

        if len(event_log) > 0 and classifier_attr_key in event_log[0]:
            parameters = {
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key
            }

            dfg = dfg_algorithm.apply(log, parameters=parameters)
            gviz = dfg_vis.apply(dfg,
                                 log=log,
                                 variant="frequency",
                                 parameters=parameters)
            # dfg_vis.view(gviz)

            dfg_vis.save(gviz,
                         "xescert_images\\" + logName.replace("xes", "png"))

        print("Reimporting log file just exported - ", exp_log_name)

        log = xes_importer.apply(exp_log_name)
        print("log loaded", exp_log_name)
        print("Number of traces - ", len(log))
        event_log = log_conversion.apply(
            log, variant=log_conversion.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
コード例 #9
0
def execute_script():
    # imports a XES event log
    log = pm4py.read_xes(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    # converts the log into a list of events (not anymore grouped in cases)
    event_stream = pm4py.convert_to_event_stream(log)
    # creates a live event stream (an object that distributes the messages to the algorithm)
    live_stream = LiveEventStream()
    # creates the streaming DFG discovery object
    stream_dfg_disc = dfg_discovery.apply()
    # register the discovery algorithm to the stream
    live_stream.register(stream_dfg_disc)
    # start the recording of events from the live event stream
    live_stream.start()
    # append each event of the original log to the live event stream
    # (so it is sent to the conformance checking algorithm)
    for event in event_stream:
        live_stream.append(event)
    # stops the live event stream
    live_stream.stop()
    # gets the DFG along with the start and end activities from the stream
    dfg, activities, start_activities, end_activities = stream_dfg_disc.get()
    # visualize the DFG
    gviz = dfg_visualizer.apply(dfg,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                activities_count=activities,
                                parameters={
                                    "format": "svg",
                                    "start_activities": start_activities,
                                    "end_activities": end_activities
                                })
    dfg_visualizer.view(gviz)
コード例 #10
0
ファイル: vis.py プロジェクト: pm4py/pm4py-core
def view_dfg(dfg: dict,
             start_activities: dict,
             end_activities: dict,
             format: str = "png",
             log: Optional[EventLog] = None):
    """
    Views a (composite) DFG

    Parameters
    -------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    format
        Format of the output picture (default: png)
    """
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    dfg_parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters
    parameters = get_properties(log)
    parameters[dfg_parameters.FORMAT] = format
    parameters[dfg_parameters.START_ACTIVITIES] = start_activities
    parameters[dfg_parameters.END_ACTIVITIES] = end_activities
    gviz = dfg_visualizer.apply(dfg,
                                log=log,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                parameters=parameters)
    dfg_visualizer.view(gviz)
コード例 #11
0
def execute_script():
    log = pm4py.read_xes("../tests/input_data/running-example.xes")
    dfg, sa, ea = pm4py.discover_dfg(log)
    tree = pm4py.discover_process_tree_inductive(log)
    heu_net = pm4py.discover_heuristics_net(log)
    net, im, fm = pm4py.discover_petri_net_alpha(log)
    bpmn = pm4py.convert_to_bpmn(tree)
    ts = ts_discovery.apply(log)
    x_cases, y_cases = case_statistics.get_kde_caseduration(log)

    gviz1 = dfg_visualizer.apply(dfg)
    gviz2 = tree_visualizer.apply(tree)
    gviz3 = hn_visualizer.apply(heu_net)
    gviz4 = pn_visualizer.apply(net, im, fm)
    gviz5 = bpmn_visualizer.apply(bpmn)
    gviz6 = ts_visualizer.apply(ts)
    gviz7 = graphs_visualizer.apply(x_cases, y_cases, variant=graphs_visualizer.Variants.CASES,
                                          parameters={graphs_visualizer.Variants.CASES.value.Parameters.FORMAT: "svg"})

    print("1", len(dfg_visualizer.serialize_dot(gviz1)))
    print("1", len(dfg_visualizer.serialize(gviz1)))
    print("2", len(tree_visualizer.serialize_dot(gviz2)))
    print("2", len(tree_visualizer.serialize(gviz2)))
    print("3", len(hn_visualizer.serialize(gviz3)))
    print("4", len(pn_visualizer.serialize_dot(gviz4)))
    print("4", len(pn_visualizer.serialize(gviz4)))
    print("5", len(bpmn_visualizer.serialize_dot(gviz5)))
    print("5", len(bpmn_visualizer.serialize(gviz5)))
    print("6", len(ts_visualizer.serialize_dot(gviz6)))
    print("6", len(ts_visualizer.serialize(gviz6)))
    print("7", len(graphs_visualizer.serialize(gviz7)))
コード例 #12
0
def execute_script():
    df = csv_import_adapter.import_dataframe_from_path(
        "../tests/input_data/receipt.csv")
    act_count = dict(df["concept:name"].value_counts())
    dfg, performance_dfg = correlation_miner.apply(
        df, variant=correlation_miner.Variants.CLASSIC)
    gviz_freq = dfg_vis.apply(dfg,
                              activities_count=act_count,
                              variant=dfg_vis.Variants.FREQUENCY,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_freq)
    gviz_perf = dfg_vis.apply(performance_dfg,
                              activities_count=act_count,
                              variant=dfg_vis.Variants.PERFORMANCE,
                              parameters={"format": "svg"})
    dfg_vis.view(gviz_perf)
コード例 #13
0
def save_full_dfg(log):
    dfg = dfg_discovery.apply(log)

    gviz = dfg_visualization.apply(
        dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz)
    parameters = {
        dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"
    }
    gviz = dfg_visualization.apply(
        dfg,
        log=log,
        variant=dfg_visualization.Variants.FREQUENCY,
        parameters=parameters)
    dfg_visualization.save(gviz, "dfg_full.svg")
    print('Full DFG saves as "dfg_full.svg"')
    return gviz
コード例 #14
0
    def test_44(self):
        import os
        from pm4py.objects.log.importer.xes import importer as xes_importer
        log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))

        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        from pm4py.visualization.dfg import visualizer as dfg_visualization

        dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
        gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE)
コード例 #15
0
    def test_45(self):
        import os
        from pm4py.objects.log.importer.xes import importer as xes_importer
        log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))

        from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
        from pm4py.visualization.dfg import visualizer as dfg_visualization

        dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
        parameters = {dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"}
        gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE,
                                       parameters=parameters)

        dfg_visualization.save(gviz, os.path.join("test_output_data", "dfg.svg"))
        os.remove(os.path.join("test_output_data", "dfg.svg"))
コード例 #16
0
def execute_script():
    log = pm4py.read_xes(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    print("number of cases", len(log))
    print("number of events", sum(len(x) for x in log))
    print("number of variants", len(pm4py.get_variants(log)))
    ac = get.get_attribute_values(log, "concept:name")
    dfg, sa, ea = pm4py.discover_dfg(log)
    perc = 0.5
    dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_activities_percentage(
        dfg, sa, ea, ac, perc)
    dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_paths_percentage(
        dfg, sa, ea, ac, perc)
    aa = time.time()
    aligned_traces = dfg_alignment.apply(log, dfg, sa, ea)
    bb = time.time()
    net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea)
    for trace in aligned_traces:
        if trace["cost"] != trace["internal_cost"]:
            print(trace)
            pass
    print(bb - aa)
    print(sum(x["visited_states"] for x in aligned_traces))
    print(
        sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST
            for x in aligned_traces))
    gviz = visualizer.apply(dfg,
                            activities_count=ac,
                            parameters={
                                "start_activities": sa,
                                "end_activities": ea,
                                "format": "svg"
                            })
    visualizer.view(gviz)
    cc = time.time()
    aligned_traces2 = petri_alignments.apply(
        log,
        net,
        im,
        fm,
        variant=petri_alignments.Variants.VERSION_DIJKSTRA_LESS_MEMORY)
    dd = time.time()
    print(dd - cc)
    print(sum(x["visited_states"] for x in aligned_traces2))
    print(
        sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST
            for x in aligned_traces2))
コード例 #17
0
ファイル: dfg_min_ex.py プロジェクト: luisfsts/pm4py-source
def execute_script():
    log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
    log = xes_importer.apply(log_path)
    dfg = dfg_algorithm.apply(log)
    dfg_gv = dfg_vis_fact.apply(
        dfg,
        log,
        parameters={
            dfg_vis_fact.Variants.FREQUENCY.value.Parameters.FORMAT: "svg"
        })
    dfg_vis_fact.view(dfg_gv)
    net, im, fm = dfg_conv.apply(dfg)
    gviz = pn_vis.apply(
        net,
        im,
        fm,
        parameters={
            pn_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "svg"
        })
    pn_vis.view(gviz)
コード例 #18
0
def create_directly_follows_graph(frame: DataFrame, output_format='svg'):
    """
    Creates a Directly Follows Graph from the supplied DataFrame.
    :param frame: the DataFrame
    :param output_format: desired output format
    :return: object representing the created graph
    """
    event_log = _convert_data_frame_to_event_log(frame)
    dfg = dfg_alg.apply(log=event_log, variant=DfgAlgVariants.FREQUENCY)
    apply = dfg_vis.apply(
        dfg,
        log=event_log,
        variant=DfgVisVariants.FREQUENCY,
        parameters={VisualisationParams.FORMAT: output_format})
    saved_dfg = tempfile.NamedTemporaryFile(prefix='pm_',
                                            suffix=f'.{output_format}',
                                            delete=False)
    dfg_vis.save(apply, saved_dfg.name)
    # close here and delete after final use to work around access issues on
    # in case anybody tries to run this on windows
    saved_dfg.close()
    return saved_dfg
コード例 #19
0
def execute_script():
    log = pm4py.read_xes("../tests/input_data/receipt.xes")
    dfg, sa, ea = pm4py.discover_dfg(log)
    act_count = pm4py.get_attribute_values(log, "concept:name")
    # keep the specified amount of activities
    dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_activities_percentage(
        dfg, sa, ea, act_count, 0.3)
    # keep the specified amount of paths
    dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_paths_percentage(
        dfg, sa, ea, act_count, 0.3)
    # view the DFG
    gviz = dfg_visualizer.apply(
        dfg,
        activities_count=act_count,
        parameters={
            dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES:
            sa,
            dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES:
            ea,
            dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg"
        })
    dfg_visualizer.view(gviz)
コード例 #20
0
    def generate_process_model(self,
                               sub_log,
                               models_path,
                               event_data_original_name,
                               w_count,
                               activity=''):
        # create the folder for saving the process map if does not exist
        models_path = self.model_type_definitions.get_models_path(
            models_path, event_data_original_name, activity)
        if not os.path.exists(models_path):
            os.makedirs(models_path)

        # mine the DFG (using Pm4Py)
        dfg, start_activities, end_activities = pm4py.discover_directly_follows_graph(
            sub_log)
        parameters = {
            dfg_visualization.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES:
            start_activities,
            dfg_visualization.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES:
            end_activities
        }
        gviz = dfg_visualization.apply(dfg, log=sub_log, parameters=parameters)
        # dfg = dfg_discovery.apply(sub_log, variant=dfg_discovery.Variants.PERFORMANCE)
        # gviz = dfg_visualization.apply(dfg, log=sub_log, variant=dfg_visualization.Variants.PERFORMANCE)

        # save the process model
        if activity and activity != '':  # adaptive approach generates models per activity
            output_filename = self.model_type_definitions.get_model_filename(
                event_data_original_name, w_count[activity])
        else:  # fixed approach generate the models based on the window size
            output_filename = self.model_type_definitions.get_model_filename(
                event_data_original_name, w_count)
        print(f'Saving {models_path} - {output_filename}')
        # Source.save(gviz, filename=output_filename, directory=models_path)
        gviz.save(filename=output_filename, directory=models_path)
        return gviz
コード例 #21
0
ファイル: vis.py プロジェクト: pm4py/pm4py-core
def save_vis_dfg(dfg: dict,
                 start_activities: dict,
                 end_activities: dict,
                 file_path: str,
                 log: Optional[EventLog] = None):
    """
    Saves a DFG visualization to a file

    Parameters
    --------------
    dfg
        DFG object
    start_activities
        Start activities
    end_activities
        End activities
    file_path
        Destination path
    """
    if log is not None:
        if type(log) not in [pd.DataFrame, EventLog, EventStream]:
            raise Exception(
                "the method can be applied only to a traditional event log!")

    format = os.path.splitext(file_path)[1][1:]
    from pm4py.visualization.dfg import visualizer as dfg_visualizer
    dfg_parameters = dfg_visualizer.Variants.FREQUENCY.value.Parameters
    parameters = get_properties(log)
    parameters[dfg_parameters.FORMAT] = format
    parameters[dfg_parameters.START_ACTIVITIES] = start_activities
    parameters[dfg_parameters.END_ACTIVITIES] = end_activities
    gviz = dfg_visualizer.apply(dfg,
                                log=log,
                                variant=dfg_visualizer.Variants.FREQUENCY,
                                parameters=parameters)
    dfg_visualizer.save(gviz, file_path)
コード例 #22
0
  st.write("")
  st.write("Os movimentos estão no seguinte formato:")
  st.write("* CódigoNacionalCNJ:Descrição do Movimento")
  st.write("")
  st.write('Quando a métrica selecionada é o "Tempo" são utilizadas as seguintes unidades:' )
  st.write('* D = Days/Dias' )
  st.write('* MO = Months/Meses' )
  st.write('* Y = Years/Anos' )
  st.write('' )
  st.write('Ex: 5D = 5 Dias, 2Y = 2 anos.' )
    
st.write("")   
sl_grafo_oj1 = st.empty()
with st.spinner('Só mais 1 segundo...'):
  
  gviz_oj1 = dfg_visualization.apply(dfg_oj1, log=tracefilter_log_pos_oj1, variant=dfg_metrica, parameters=parameters)
  html = render_svg(gviz_oj1.pipe().decode('utf-8'), 1200, 500)
  sl_grafo_oj1.write(html, unsafe_allow_html=True)

    
st.header(f"Órgão Julgador 2 - {sb_2_OJ}")
with st.beta_expander("Ajuda"):        
  st.write("Trib: "+sb_1_trib) 
  st.write("Classes: "+sb_1_classes)
  st.write("Órgão: "+sb_2_OJ)
  st.write("")
  st.write("Os movimentos estão no seguinte formato:")
  st.write("* CódigoNacionalCNJ:Descrição do Movimento")
  st.write("")
  st.write('Quando a métrica selecionada é o "Tempo" são utilizadas as seguintes unidades:' )
  st.write('* D = Days/Dias' )
コード例 #23
0
def filter_for_periods(detect_result, event_counts):
    start_element1 = 0 if CHOSEN_PERIOD1 == 1 else detect_result[CHOSEN_PERIOD1
                                                                 - 2]
    end_element1 = detect_result[CHOSEN_PERIOD1 - 1]

    start_element2 = 0 if CHOSEN_PERIOD2 == 1 else detect_result[CHOSEN_PERIOD2
                                                                 - 2]
    end_element2 = detect_result[CHOSEN_PERIOD2 - 1]

    days = list(event_counts.keys())
    #print(days[start_element1])
    start_day1 = days[start_element1]
    end_day1 = days[end_element1 - 1]
    days_count1 = end_element1 - start_element1

    start_day2 = days[start_element2]
    end_day2 = days[end_element2 - 1]
    days_count2 = end_element2 - start_element2

    # Traces that are FULLY CONTAINED in the given timeframe
    period_1_log = timestamp_filter.filter_traces_contained(
        log, start_day1 + " 00:00:00", end_day1 + " 23:59:59")
    period_2_log = timestamp_filter.filter_traces_contained(
        log, start_day2 + " 00:00:00", end_day2 + " 23:59:59")

    # Traces that INTERSECT with the given timeframe
    # period_1_log = timestamp_filter.filter_traces_intersecting(log, start_day+" 00:00:00", end_day+" 23:59:59")

    dfg1 = dfg_discovery.apply(period_1_log)
    dfg2 = dfg_discovery.apply(period_2_log)

    gviz1 = dfg_visualization.apply(
        dfg1, log=period_1_log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz1)

    # Saving the DFG
    parameters = {
        dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"
    }
    gviz1 = dfg_visualization.apply(
        dfg1,
        log=period_1_log,
        variant=dfg_visualization.Variants.FREQUENCY,
        parameters=parameters)
    dfg_visualization.save(gviz1, "dfg1.svg")

    nodes_period1, edges_period1 = dot_to_df(gviz1)

    gviz2 = dfg_visualization.apply(
        dfg2, log=period_2_log, variant=dfg_visualization.Variants.FREQUENCY)
    dfg_visualization.view(gviz2)

    # Saving the DFG
    parameters = {
        dfg_visualization.Variants.PERFORMANCE.value.Parameters.FORMAT: "svg"
    }
    gviz2 = dfg_visualization.apply(
        dfg2,
        log=period_2_log,
        variant=dfg_visualization.Variants.FREQUENCY,
        parameters=parameters)
    dfg_visualization.save(gviz2, "dfg2.svg")

    return days_count1, days_count2, period_1_log, period_2_log, gviz1, gviz2
コード例 #24
0
def dfg_discovery_frequency(log):
    # creatig the graph from log
    dfg = dfg_discovery.apply(log)
    gviz = dfg_visualization.apply(
        dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY)
    return gviz, None
コード例 #25
0
def dfg_discovery_active_time(log):
    # creatig the graph from log
    dfg = dfg_discovery.apply(log, variant=dfg_discovery.Variants.PERFORMANCE)
    gviz = dfg_visualization.apply(
        dfg, log=log, variant=dfg_visualization.Variants.PERFORMANCE)
    return gviz, None
コード例 #26
0
def discover_process_models(log_path, log_name):
    custom_print('Importando log')

    log_complete = xes_importer.apply(log_path)
    log = variants_filter.filter_log_variants_percentage(log_complete, 0.9)

    #A_ACTIVATED, A_DECLINED, A_CANCELLED
    #log = attributes_filter.apply(log_complete, ["A_ACTIVATED"], parameters={attributes_filter.Parameters.ATTRIBUTE_KEY: "concept:name", attributes_filter.Parameters.POSITIVE: True})

    custom_print('Log importado')

    if (1 == 2):
        #Inductive Miner
        custom_print('Iniciando Inductive Miner')

        parameters = {
            inductive_miner.Variants.IM.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IM.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IM

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IM')

        custom_print('Inductive Miner finalizado\n')

    if (1 == 2):
        #Inductive Miner Infrequent 0.2
        custom_print('Iniciando Inductive Miner Infrequent 0.2')

        parameters = {
            inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD:
            0.2,
            inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMf

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IMf0.2')

        custom_print('Inductive Miner Infrequent 0.2 finalizado\n')

    if (1 == 1):
        #Inductive Miner Infrequent 0.5
        custom_print('Iniciando Inductive Miner Infrequent 0.5')

        parameters = {
            inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD:
            0.5,
            inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMf

        petrinet, initial_marking, final_marking = inductive_miner.apply(
            log, parameters=parameters, variant=variant)
        print_statistics(petrinet, 'IMf0.5')

        custom_print('Inductive Miner Infrequent 0.5 finalizado\n')

        ts = reachability_graph.construct_reachability_graph(
            petrinet, initial_marking)
        gviz = ts_visualizer.apply(
            ts,
            parameters={
                ts_visualizer.Variants.VIEW_BASED.value.Parameters.FORMAT:
                "png"
            })
        gviz.render('petrinets/simple-reach', cleanup=True)

        pnml_exporter.apply(petrinet, initial_marking,
                            "petrinets/simple-petri.pnml")

    if (1 == 2):
        #Inductive Miner Infrequent 0.8
        custom_print('Iniciando Inductive Miner Infrequent 0.8')

        parameters = {
            inductive_miner.Variants.IMf.value.Parameters.NOISE_THRESHOLD:
            0.8,
            inductive_miner.Variants.IMf.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMf.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMf

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IMf0.8')

        custom_print('Inductive Miner Infrequent 0.8 finalizado\n')

    if (1 == 2):
        #Inductive Miner Directly-Follows
        custom_print('Iniciando Inductive Miner Directly-Follows')

        parameters = {
            inductive_miner.Variants.IMd.value.Parameters.CASE_ID_KEY:
            'case:concept:name',
            inductive_miner.Variants.IMd.value.Parameters.TIMESTAMP_KEY:
            'time:timestamp'
        }
        variant = inductive_miner.Variants.IMd

        petrinet = inductive_miner.apply(log,
                                         parameters=parameters,
                                         variant=variant)
        print_statistics(petrinet[0], 'IMd')

        custom_print('Inductive Miner Infrequent Directly-Follows\n')

    if (1 == 2):
        #Alpha Miner
        custom_print('Iniciando Alpha Miner')

        parameters = {}
        variant = alpha_miner.Variants.ALPHA_VERSION_CLASSIC

        petrinet = alpha_miner.apply(log,
                                     parameters=parameters,
                                     variant=variant)
        print_statistics(petrinet[0], 'Alpha')

        custom_print('Alpha Miner finalizado\n')

    if (1 == 2):
        #Heuristic Miner 0.5
        custom_print('Iniciando Heuristic Miner 0.5')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.5
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM0.5')

        custom_print('Heuristic Miner 0.5 finalizado\n')

    if (1 == 2):
        #Heuristic Miner 0.99
        custom_print('Iniciando Heuristic Miner 0.99')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.99
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM0.99')

        custom_print('Heuristic Miner 0.99 finalizado\n')

    if (1 == 2):
        #Heuristic Miner 0.1
        custom_print('Iniciando Heuristic Miner 0.1')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.1
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM0.1')

        custom_print('Heuristic Miner 0.1 finalizado\n')

    if (1 == 2):
        #Heuristic Miner 1.0
        custom_print('Iniciando Heuristic Miner 1.0')

        parameters = {
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            1.0
        }

        petrinet = heuristics_miner.apply(log, parameters=parameters)
        print_statistics(petrinet[0], 'HM1.0')

        custom_print('Heuristic Miner 1.0 finalizado\n')

    if (1 == 2):
        #DFG
        custom_print('Iniciando DFG')

        dfg = dfg_discovery.apply(log)
        parameters = {
            dfg_visualization.Variants.FREQUENCY.value.Parameters.FORMAT: 'png'
        }
        gviz = dfg_visualization.apply(
            dfg,
            log=log,
            variant=dfg_visualization.Variants.FREQUENCY,
            parameters=parameters)
        dfg_visualization.save(gviz, 'petrinets/simple-DFG.png')

        custom_print('DFG finalizado\n')
コード例 #27
0
import pandas as pd

# this part is required because the dataframe provided by PowerBI has strings
dataset["time:timestamp"] = pd.to_datetime(dataset["time:timestamp"])

from pm4py.algo.discovery.dfg.adapters.pandas import df_statistics

dfg = df_statistics.get_dfg_graph(dataset, measure="frequency")

from pm4py.statistics.attributes.pandas import get as attributes_get

activities_count = attributes_get.get_attribute_values(dataset, "concept:name")

from pm4py.statistics.start_activities.pandas import get as sa_get

start_activities = sa_get.get_start_activities(dataset)
from pm4py.statistics.end_activities.pandas import get as ea_get

end_activities = ea_get.get_end_activities(dataset)

from pm4py.visualization.dfg import visualizer

gviz = visualizer.apply(dfg,
                        activities_count=activities_count,
                        variant=visualizer.Variants.FREQUENCY,
                        parameters={
                            "start_activities": start_activities,
                            "end_activities": end_activities
                        })
visualizer.matplotlib_view(gviz)
コード例 #28
0
def apply_filter(req):
	sessions[req.session["id"]] = datetime.now()
	filters = {
		"time": True,
		"variants": True,
		"performance": True,
		"activities": True,
		"attribute": True
	}
	req.session.set_expiry(7200)
	#print(str(req.body))
	o = json.loads(req.body)
	print(str(o))
	custom_time_range = []
	for pair in o["filter1"]:
		#custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1])))
		custom_time_range.append((pair[0],pair[1]))
	if o["filter1"] == []:
		filters["time"] = False
	#print(o["filter1"][0])
	#print(custom_time_range[0][0])
	#print(custom_time_range)
	custom_path_range = []
	for pair in o["filter2"]:
		custom_path_range.append((float(pair[0]),float(pair[1])))
	if o["filter2"] == []:
		filters["variants"] = False
		#custom_path_range = [(0,1)] #filter2
	custom_performance_range = []
	for pair in o["filter3"]:
		custom_performance_range.append((float(pair[0]),float(pair[1])))
	if o["filter3"] == []:
		filters["performance"] = False
	custom_activitiy_range = []
	for pair in o["filter4"]:
		custom_activitiy_range.append((float(pair[0]),float(pair[1])))
	if o["filter4"] == []:
		filters["activities"] = False
		#custom_activitiy_range = [(0,1)] #filter3
	custom_attribute_range = []
	for pair in o["filter5"]:
		custom_attribute_range.append((float(pair[0]),float(pair[1])))
	if o["filter5"] == [] or o["filter5attribute"] == "Empty":
		filters["attribute"] = False
	additional_attribute = o["filter5attribute"]

	selected_viz = o["visualization"]
	calc_lev = o["distance"]
	#input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes")
	input_file = os.path.join("webapp","static", "sepsis.xes")
	input_log = xes_importer.apply(input_file)
	not_filtered_logs = {}
	flatten = lambda l: [item for sublist in l for item in sublist]

	time_timestamp_started = datetime.now()
	if filters["time"]:
		#TODO check overlapping for filter
		custom_time_range = sorted(custom_time_range, reverse=False)
		for i in range(0,len(custom_time_range)-1):
			if(custom_time_range[i][1] > custom_time_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping time ranges")

		logs = []
		for (x,y) in custom_time_range:
			logs.append(timestamp_filter.filter_traces_contained(input_log, x, y))

		#log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1])
		log = pm4py.objects.log.log.EventLog()
		for timeslice in logs:
			for trace in timeslice:
				log.append(trace)
		print(len(input_log))
		print(len(log))
		#l2
		not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog()
		for trace in input_log:
			if trace not in log:
				not_filtered_logs["timestamp_filter"].append(trace)
		print(len(not_filtered_logs["timestamp_filter"]))
	else:
		log = input_log

	time_variants_started = datetime.now() # where should I start?

	if filters["variants"]:
		variants = variants_filter.get_variants(log)
		variants_count = case_statistics.get_variant_statistics(log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		custom_path_range = sorted(custom_path_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_path_range)-1):
			if(custom_path_range[i][1] > custom_path_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping variants ranges")

		nr_variants = len(variants_count)
		custom_path_range * nr_variants
		idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range]
		variants_subset = [variants_count[x:y+1] for (x,y) in idx]
		variants_subset = flatten(variants_subset)
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(log, filtered_variants)
		#l2
		not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants)
	else:
		filtered_log = log

	time_variants_finished = datetime.now() # note: incl log2 generation

	if filters["performance"]:
		custom_performance_range = sorted(custom_performance_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_performance_range)-1):
			if(custom_performance_range[i][1] > custom_performance_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping performance ranges")

		#all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"})
		#case_filter.filter_case_performance(log, 86400, 864000)
		performances = []
		for i in range(len(filtered_log)):
			filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds()
			performances.append(filtered_log[i].attributes["throughput"])

		nr_cases = len(filtered_log)
		performances = sorted(performances, reverse=False)
		idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range]
		perf_subset = [performances[x:y+1] for (x,y) in idx]
		perf_subset = flatten(perf_subset)

		performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset])
		#l2
		not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset])
		#print(str(len(not_filtered_logs["performance_filter"])))

	else:
		performance_log = filtered_log

	time_performance_finished = datetime.now()

	if filters["activities"]:
		variants = variants_filter.get_variants(performance_log)
		variants_count = case_statistics.get_variant_statistics(performance_log)
		variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False)

		activities = dict()
		for variant in variants_count:
			for activity in variant["variant"].split(","):
				if (activity not in activities.keys()):
					activities[activity] = variant["count"]
				else:
					activities[activity] += variant["count"]

		sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])}
		activities_sorted_list = list(sorted_activities)
		custom_activitiy_range = sorted(custom_activitiy_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_activitiy_range)-1):
			if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"}))
				response.status_code = 200
				return response
				#raise ValueError("Overlapping activities ranges")
		nr_activities = len(activities_sorted_list)
		idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range]
		activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx]
		activities_to_keep = flatten(activities_to_keep)
		variants_idx = []
		for i in range(len(variants_count)):
			for activity in activities_to_keep:
				if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)):
					variants_idx.append(i)
		variants_subset = [variants_count[i] for i in variants_idx]
		filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]}
		#l2
		not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]}

		filtered_log = variants_filter.apply(performance_log, filtered_variants)

		#l2
		not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants)

		new_log = pm4py.objects.log.log.EventLog()
		#not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog()
		for trace in filtered_log:
			new_trace = pm4py.objects.log.log.Trace()
			not_new_trace = pm4py.objects.log.log.Trace()
			for event in trace:
				if(event['concept:name'] in activities_to_keep):
					new_trace.append(event)
				else:
					not_new_trace.append(event)
			if(len(new_trace)>0):
				new_log.append(new_trace)
			if(len(not_new_trace)>0):
				not_filtered_logs["activities_filter"].append(not_new_trace)
	else:
		new_log = performance_log

	time_activities_finished = datetime.now()

	if filters["attribute"]:
		custom_attribute_range = sorted(custom_attribute_range, reverse=False)
		# check overlapping
		for i in range(0,len(custom_attribute_range)-1):
			if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]):
				response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"}))
				response.status_code = 200
				return response

		newest_log = pm4py.objects.log.log.EventLog()
		not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog()

		traces_with_attr = []
		not_traces_with_attr = []
		for trace in new_log:
			if additional_attribute in trace.attributes.keys():
				traces_with_attr.append(trace)
			else:
				not_traces_with_attr.append(trace)
		#check if trace attribute
		if len(traces_with_attr)>0:
			#check if numeric
			if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]:
				for trace in traces_with_attr:
					if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]):
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)
			else: #string
				attribute_frequencies = dict()
				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] not in attribute_frequencies.keys():
						attribute_frequencies[trace.attributes[additional_attribute]] = 0
					attribute_frequencies[trace.attributes[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in traces_with_attr:
					if trace.attributes[additional_attribute] in values_to_keep:
						newest_log.append(trace)
					else:
						not_filtered_logs["additional_filter"].append(trace)
				for trace in not_traces_with_attr:
					not_filtered_logs["additional_filter"].append(trace)

		else: #event attribute
			if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]:
				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)
			else: #string
				attribute_frequencies = dict()
				for trace in new_log:
					for event in trace:
						if additional_attribute in event.keys():
							if event[additional_attribute] not in attribute_frequencies.keys():
								attribute_frequencies[event[additional_attribute]] = 0
							attribute_frequencies[event[additional_attribute]] += 1

				sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])}
				frequencies_sorted_list = list(sorted_frequencies)

				nr_values = len(frequencies_sorted_list)
				idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range]
				values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx]
				values_to_keep = flatten(values_to_keep)

				for trace in new_log:
					new_trace = pm4py.objects.log.log.Trace()
					not_new_trace = pm4py.objects.log.log.Trace()
					for event in trace:
						if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep):
							new_trace.append(event)
						else:
							not_new_trace.append(event)
					if(len(new_trace)>0):
						newest_log.append(new_trace)
					if(len(not_new_trace)>0):
						not_filtered_logs["additional_filter"].append(not_new_trace)


	else:
		newest_log = new_log

	time_attribute_finished = datetime.now()

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(newest_log)
		gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))
	else:
		heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png"))

	xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes"))


	#l2
	not_filtered_log = pm4py.objects.log.log.EventLog()
	for part in not_filtered_logs.keys():
		for trace in not_filtered_logs[part]:
			not_filtered_log.append(trace)

	if(selected_viz=="dfgf"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	elif(selected_viz=="dfgp"):
		dfg = dfg_discovery.apply(not_filtered_log)
		gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE)
		dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	else:
		heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99})
		gviz = hn_vis_factory.apply(heu_net)
		hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png"))
	xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes"))

	if(calc_lev):
		lev_new = [0]*len(newest_log)
		for i in range(len(newest_log)):
			lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]]

		lev_not = [0]*len(not_filtered_log)
		for i in range(len(not_filtered_log)):
			lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]]

		distances = []
		for i in range(len(lev_new)):
			for j in range(len(lev_not)):
				distances.append(lev_dist(lev_new[i], lev_not[j]))
		lev_d = sum(distances)/len(distances)
		print("Levenshtein's distance: "+str(lev_d))
	else:
		lev_d = "null"

	used_paths = 0
	for lower, higher in custom_path_range:
		used_paths += round((higher-lower)*100)
	print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.")

	print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds()))
	response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d}))
	response.status_code = 200
	return response
コード例 #29
0
from pm4pydistr.remote_wrapper import factory as wrapper_factory
from pm4py.visualization.dfg import visualizer as dfg_visualizer

wrapper = wrapper_factory.apply("127.0.0.1", "5001", "hello", "receipt")
dfg, performance_dfg, activities_counter = wrapper.correlation_miner(parameters={"min_act_freq": 100})
gviz = dfg_visualizer.apply(dfg, activities_count=activities_counter, parameters={"format": "svg"})
dfg_visualizer.view(gviz)
コード例 #30
0
nx.draw_networkx_nodes(G,
                       pos,
                       partition.keys(),
                       node_size=40,
                       cmap=cmap,
                       node_color=list(partition.values()))
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.show()

community_louvain.modularity(partition, G)

girvannewman = graphLearning.community_dection_graph(graph, mst=False)

from pm4py.visualization.dfg import visualizer as dfg_visualization
gviz = dfg_visualization.apply(dfg,
                               log=ex1_personal_log_1_converted,
                               variant=dfg_visualization.Variants.FREQUENCY)
dfg_visualization.view(gviz)


def fixDfg(dfg,
           activityList=[
               'Read_Labsheet', 'Read_Lecture_Note', 'Excercise',
               'Check_solution'
           ]):
    result = {}
    transitionList = []
    for i in activityList:
        for j in activityList:
            transitionList.append((i, j))
    for t in transitionList: