コード例 #1
0
ファイル: process_mining.py プロジェクト: bpm-diag/smartRPA
    def save_dfg(self, name="DFG", high_level=False):
        """
        Save DFG to file

        :param name: optional name of dfg file
        :param high_level: generate high level dfg
        """
        dfg, log = self._createDFG()
        parameters = self._createImageParameters(log=log, high_level=high_level)
        if high_level:
            gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=parameters)
        else:
            gviz = dfg_vis_factory.apply(dfg, log=self._log, variant="frequency", parameters=parameters)
        self._create_image(gviz, name)
コード例 #2
0
def execute_script():
    log_input_directory = "xesinput"
    all_logs_names = os.listdir(log_input_directory)
    all_logs_names = [log for log in all_logs_names if ".xe" in log]

    for logName in all_logs_names:
        # logPath = os.path.join("..", "tests", "inputData", logName)
        log_path = log_input_directory + "\\" + logName
        log = xes_importer.import_log(log_path)
        print("\n\n")
        print("log loaded")
        print("Number of traces - ", len(log))
        event_log = log_conv_fact.apply(log,
                                        variant=log_conv_fact.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
        exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName
        print("exporting log", exp_log_name)
        xes_exporter.export_log(log, exp_log_name)
        print("exported log", exp_log_name)

        log, classifier_attr_key = insert_classifier.search_act_class_attr(log)

        classifiers = list(log.classifiers.keys())
        if classifier_attr_key is None and classifiers:
            try:
                print(classifiers)
                log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute(
                    log, classifiers[0])
                print(classifier_attr_key)
            except:
                print("exception in handling classifier")

        if classifier_attr_key is None:
            classifier_attr_key = "concept:name"

        if len(event_log) > 0 and classifier_attr_key in event_log[0]:
            parameters = {
                constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key
            }

            dfg = dfg_factory.apply(log, parameters=parameters)
            gviz = dfg_vis_factory.apply(dfg,
                                         log=log,
                                         variant="frequency",
                                         parameters=parameters)
            # dfg_vis_factory.view(gviz)

            dfg_vis_factory.save(
                gviz, "xescert_images\\" + logName.replace("xes", "png"))

        print("Reimporting log file just exported - ", exp_log_name)

        log = xes_importer.import_log(exp_log_name)
        print("log loaded", exp_log_name)
        print("Number of traces - ", len(log))
        event_log = log_conv_fact.apply(log,
                                        variant=log_conv_fact.TO_EVENT_STREAM)
        print("Number of events - ", len(event_log))
        print("Classifiers ", log.classifiers)
コード例 #3
0
def bot_dfg(chat_id):
    log = get_current_log(chat_id)
    dfg = dfg_factory.apply(log)
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    new_file, filename = tempfile.mkstemp(suffix="png")
    dfg_vis_factory.save(gviz, filename)
    return filename
コード例 #4
0
def execute_script():
    log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
    log = xes_importer.apply(log_path)
    dfg = dfg_factory.apply(log)
    dfg_gv = dfg_vis_fact.apply(dfg, log, parameters={"format": "svg"})
    dfg_vis_fact.view(dfg_gv)
    net, im, fm = dfg_conv_factory.apply(dfg)
    gviz = pn_vis_factory.apply(net, im, fm, parameters={"format": "svg"})
    pn_vis_factory.view(gviz)
コード例 #5
0
ファイル: visualizing_dfg.py プロジェクト: Elkoumy/amun
def draw_DFG(dfg):
    parameters = {"format": "svg"}
    gviz = dfg_vis_factory.apply(dfg,
                                 variant="performance",
                                 parameters=parameters)
    # log = xes_importer.import_log(r"")
    # dfg = dfg_factory.apply(log)
    # gviz = dfg_vis_factory.apply(dfg, log=log, variant="performance", parameters=parameters)
    dfg_vis_factory.save(gviz, "dfg.svg")
コード例 #6
0
def uselog(loginput):
    log = xes_import_factory.apply(loginput)
    log = sorting.sort_timestamp(log)
    # print(log)
    dfg = dfg_factory.apply(log)
    dfg_gv = dfg_vis_fact.apply(dfg, log, parameters={"format": "svg"})
    this_data = dfg_to_g6.dfg_to_g6(dfg)

    # dfg_vis_fact.view(dfg_gv)
    return this_data
    '''grouplist = get_groups(log)
コード例 #7
0
def visualize_dfg(log, filename):
    """
    Visualizes an event log as a DFG
    :param log: event log that will be visualized
    :param filename: filename for the created DFG
    """
    dfg = dfg_factory.apply(log)
    parameters = {"format": "svg"}
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 parameters=parameters,
                                 variant='frequency')
    dfg_vis_factory.save(gviz, filename)
コード例 #8
0
def Hueristics(file):
    #import os
    from pm4py.objects.log.importer.xes import factory as xes_importer
    log = xes_importer.import_log(file)

    from pm4py.algo.discovery.dfg import factory as dfg_factory
    dfg = dfg_factory.apply(log)

    from pm4py.visualization.dfg import factory as dfg_vis_factory
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    location = "/mnt/c/Users/harim/Downloads/dfg.png"
    dfg_vis_factory.save(gviz, location)
    return location
コード例 #9
0
ファイル: process_mining.py プロジェクト: bpm-diag/smartRPA
 def highLevelDFG(self):
     """
     Create high level DFG of entire process
     """
     try:
         df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False)
         dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters)
         gviz_parameters = self._createImageParameters(log=log, high_level=True)
         gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=gviz_parameters)
         self._create_image(gviz, "DFG_model")
     except Exception as e:
         print(f"[PROCESS MINING] Could not create DFG: {e}")
         return False
コード例 #10
0
ファイル: utils.py プロジェクト: madhubs08/UIFramework_pm4py
def generate_process_model(log):
    '''
    Description: to generate graphical process model in
                .svg format using pm4py library function
    Used: generate process model under provided log
    Input: log file
    Output: Display process model
    '''

    dfg = dfg_factory.apply(log)
    '''To decorate DFG with the frequency of activities'''
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    dfg_vis_factory.view(gviz)
    return dfg
コード例 #11
0
ファイル: utils.py プロジェクト: madhubs08/UIFramework_pm4py
def export_process_model(dfgModel, log, filename):
    '''
    Description: to export graphical process model in .svg format
    using pm4py library function
    Used: generate and export process model under provided file name
    Input: dfgModel, log file, file name
    Output: N/A
    '''

    # dfg = dfg_factory.apply(log, variant="performance")
    parameters = {"format": "svg"}
    gviz = dfg_vis_factory.apply(dfgModel,
                                 log=log,
                                 variant="frequency",
                                 parameters=parameters)
    dfg_vis_factory.save(gviz, filename)
コード例 #12
0
def execute_script():
    # import csv & create log
    dataframe = csv_import_adapter.import_dataframe_from_path(
        datasourceMockdata(), sep=";")
    dataframe = dataframe.rename(columns={
        'coID': 'case:concept:name',
        'Activity': 'concept:name'
    })
    log = conversion_factory.apply(dataframe)

    # option 1: Directly-Follows Graph, represent frequency or performance
    parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"}
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz1 = dfg_vis_factory.apply(dfg,
                                  log=log,
                                  variant=variant,
                                  parameters=parameters)
    dfg_vis_factory.view(gviz1)

    # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg)
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz2 = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz2)

    # option 3: Petri Net based on Heuristic Miner (.png)
    net, im, fm = heuristics_miner.apply(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz3 = petri_vis.apply(
        net,
        im,
        fm,
        parameters={
            petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png"
        })
    petri_vis.view(gviz3)
コード例 #13
0
ファイル: model_visualization.py プロジェクト: Elkoumy/amun
def view_model(dfg, dir=""):
    #rounding for better plot
    for key in dfg.keys():
        dfg[key] = round(dfg[key])

    parameters = {"format": "svg"}

    gviz = dfg_vis_factory.apply(dfg,
                                 variant="frequency",
                                 parameters=parameters)
    # gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters)# for time
    dfg_vis_factory.save(gviz, dir + ".svg")
    drawing = svg2rlg(dir + ".svg")
    renderPDF.drawToFile(drawing, dir + ".pdf")
    return


# log = xes_import_factory.apply(r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ülikool\Data\Data XES\Sepsis Cases - Event Log.xes")
# data=get_dataframe_from_event_stream(log)
# dfg_freq = dfg_factory.apply(log,variant="frequency")
#
# view_model(dfg_freq,r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ülikool\Differential Privacy\source code\experiment_figures\temp")
コード例 #14
0
ファイル: get_vis.py プロジェクト: zhengyuxin/pm4py-ws
def apply(dataframe, parameters=None):
    """
    Gets the performance DFG

    Parameters
    ------------
    dataframe
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    timestamp_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if pm4_constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY
    case_id_glue = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_CASEID_KEY] if pm4_constants.PARAMETER_CONSTANT_CASEID_KEY in parameters else CASE_CONCEPT_NAME

    parameters[pm4_constants.RETURN_EA_COUNT_DICT_AUTOFILTER] = True
    dataframe = attributes_filter.filter_df_keeping_spno_activities(
        dataframe,
        activity_key=activity_key,
        max_no_activities=constants.MAX_NO_ACTIVITIES)
    dataframe, end_activities = auto_filter.apply_auto_filter(
        dataframe, parameters=parameters)
    end_activities = list(end_activities.keys())
    [dfg, dfg_perf
     ] = df_statistics.get_dfg_graph(dataframe,
                                     activity_key=activity_key,
                                     timestamp_key=timestamp_key,
                                     case_id_glue=case_id_glue,
                                     sort_caseid_required=False,
                                     sort_timestamp_along_case_id=False,
                                     measure="both")
    activities_count = attributes_filter.get_attribute_values(
        dataframe, activity_key, parameters=parameters)
    activities = list(activities_count.keys())
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)
    dfg_perf = {x: y for x, y in dfg_perf.items() if x in dfg}
    start_activities = list(
        start_activities_filter.get_start_activities(
            dataframe, parameters=parameters).keys())
    gviz = dfg_vis_factory.apply(dfg_perf,
                                 activities_count=activities_count,
                                 variant="performance",
                                 parameters={
                                     "format": "svg",
                                     "start_activities": start_activities,
                                     "end_activities": end_activities
                                 })

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "parquet", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "perf", None, "", activity_key
コード例 #15
0
time3 = time.time()
print("time3 - time2: "+str(time3-time2))
#dataframe = df_filtering.filter_df_on_ncases(dataframe, case_id_glue="case:concept:name", max_no_cases=1000)
time4 = time.time()
print("time4 - time3: "+str(time4-time3))
#dataframe = df_filtering.filter_df_on_case_length(dataframe, case_id_glue="case:concept:name", min_trace_length=3, max_trace_length=50)
print(dataframe)
time5 = time.time()
print("time5 - time4: "+str(time5-time4))
dataframe = csv_importer.convert_timestamp_columns_in_df(dataframe)
time6 = time.time()
print("time6 - time5: "+str(time6-time5))
#dataframe = dataframe.sort_values('time:timestamp')
time7 = time.time()
print("time7 - time6: "+str(time7-time6))

# show the filtered dataframe on the screen
activities_count = df_statistics.get_activities_count(dataframe)
[dfg_frequency, dfg_performance] = df_statistics.get_dfg_graph(dataframe, measure="both")
#activities_count = df_statistics.get_activities_count(dataframe, activity_key="event")
#[dfg_frequency, dfg_performance] = df_statistics.get_dfg_graph(dataframe, measure="both", case_id_glue="case", activity_key="event", timestamp_key="startTime")
time8 = time.time()
print("time8 - time7: "+str(time8-time7))
gviz = dfg_vis_factory.apply(dfg_frequency, activities_count=activities_count)
gviz.view()
net, initial_marking, final_marking = inductive_factory.apply_dfg(dfg_frequency)
gviz = pn_viz.graphviz_visualization(net, initial_marking=initial_marking, final_marking=final_marking, debug=True)
gviz.view()
time9 = time.time()
print("time9 - time8: "+str(time9-time8))
print("time9 - time1: "+str(time9-time1))
コード例 #16
0
#####################################################################################################################
# Datentransformation
#####################################################################################################################
# datetime64[Y]=year, datetime64[M]=month, datetime64[D]=day, datetime64[h]=hour, datetime64[m]=minute, datetime64[s]=second 
dataframe['Time'] = dataframe['Time'].astype('datetime64[D]')
# Attribute benennen
dataframe = dataframe.rename(columns={'CaseID':'case:concept:name', 'Time':'time:timestamp', 'Activity':'concept:name', 'Resource':'org:resource'})
#print(dataframe)

# Dataframe als log 
log = conversion_factory.apply(dataframe)
#print(log)

#####################################################################################################################
# Directly-Follows Graphs (DFG) erstellen
#####################################################################################################################
# AKCTIVITIES: ACTIVITY_KEY = "concept:name"
# RESOURCE: ACTIVITY_KEY = "org:resource"
parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"}

# frequency = Haeufigkeiten der Activity / Resource
# performance = durchschnittliche Dauer der Activity / Resource
variant='frequency'

dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)

#####################################################################################################################
# Datenvisualisierung: Directly-Follows Graphs (DFG) mit GRAPHVIZ visualisieren
#####################################################################################################################
gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters)
dfg_vis_factory.view(gviz)
コード例 #17
0
 def visualize_dfg(self,
                   log,
                   save_file=False,
                   file_name="dfg",
                   variant="relevance"):
     """
     Visualises the event log as direct follower graph (DFG).
     :param log: event log as a list of traces [list].
     :param save_file: boolean flog indicating to save the DFG or not [bool].
     :param file_name: name of the file [str].
     :param variant: dfg version to be produced: "frequency", "time", "relevance" or "all" [str]
     :return: file_names [list].
     """
     parameters = {"format": "svg"}
     file_names = []
     relevance_scores = self.aggregate_relevance_scores(log)
     if variant == "relevance" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             gviz = dfg_vis_factory.apply(dfg,
                                          activities_count=items['scores'],
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             print("rel_sc: ", items['scores'])
             if save_file:
                 filen = file_name + "_rel_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
     if variant == "frequency" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             activities_cnt = attributes_filter.get_attribute_values(
                 log, attribute_key="concept:name")
             gviz = dfg_vis_factory.apply(dfg,
                                          activities_count=activities_cnt,
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             if save_file:
                 filen = file_name + "_freq_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
     if variant == "time" or variant == "all":
         for label, items in relevance_scores.items():
             data = filter_log_by_caseid(log, items['traces'])
             dfg = dfg_factory.apply(data)
             parameters = {"format": "svg", "AGGREGATION_MEASURE": "mean"}
             gviz = dfg_vis_factory.apply(dfg,
                                          variant="performance",
                                          parameters=parameters)
             if len(items['traces']) == 1:
                 title = "Prediction: " + str(
                     label) + ", Case ID: " + items['traces'][0]
             else:
                 title = "No of Service Orders: " + str(len(
                     log)) + ", Filter: Repair not on time (Label = " + str(
                         label) + ")"
             gviz.body.append('\t// title')
             gviz.body.append('\tfontsize = 50;')
             gviz.body.append('\tlabelloc = "t";')
             gviz.body.append('\tlabel = "' + title + '";')
             if save_file:
                 filen = file_name + "_time_" + str(label) + ".svg"
                 dfg_vis_factory.save(gviz, filen)
                 print("Saved DFG image to: " + filen)
                 file_names.append(filen)
         return file_names
コード例 #18
0
2	load-scroll	0.034766
5	load-selection	0.033676
20	blur-scroll	0.033074
26	selection-scroll	0.032246
31	click-0-load	0.029780
'''
#-----------------

from pm4py.objects.log.importer.csv import factory as csv_importer
excellentLog1A = csv_importer.import_event_stream('Excellent1A_fixed.csv')
from pm4py.objects.conversion.log import factory as conversion_factory
log1 = conversion_factory.apply(excellentLog1A)

from pm4py.visualization.dfg import factory as dfg_vis_factory

gviz = dfg_vis_factory.apply(dfg1, log=log1, variant="frequency")
dfg_vis_factory.view(gviz)

from pm4py.objects.conversion.dfg import factory as dfg_mining_factory

net, im, fm = dfg_mining_factory.apply(dfg1)

from pm4py.visualization.petrinet import factory as pn_vis_factory

gviz = pn_vis_factory.apply(net, im, fm)
pn_vis_factory.view(gviz)

from pm4py.evaluation.replay_fitness import factory as replay_factory
fitness_alpha = replay_factory.apply(log1, net, im, fm)

from pm4py.algo.conformance.alignments import factory as align_factory
コード例 #19
0
import os, sys, inspect
currentdir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from pm4py.algo.dfg import factory as dfg_factory, replacement as dfg_replacement
from pm4py.log.importer import xes as xes_importer
from pm4py.filtering.tracelog.auto_filter import auto_filter
from pm4py.filtering.tracelog.attributes import attributes_filter
from pm4py.visualization.dfg import factory as dfg_vis_factory

# measure could be frequency or performance
measure = "frequency"
log = xes_importer.import_from_file_xes(
    '..\\tests\\inputData\\running-example.xes')
filtered_log = auto_filter.apply_auto_filter(log)
filtered_log_activities_count = attributes_filter.get_activities_from_log(
    filtered_log)
intermediate_log = attributes_filter.filter_log_by_specified_attributes(
    log, list(filtered_log_activities_count.keys()))
dfg_filtered_log = dfg_factory.apply(filtered_log, variant=measure)
dfg_intermediate_log = dfg_factory.apply(intermediate_log, variant=measure)
dfg_filtered_log = dfg_replacement.replace_values(dfg_filtered_log,
                                                  dfg_intermediate_log)

gviz = dfg_vis_factory.apply(dfg_filtered_log, log=intermediate_log)
gviz.view()
#base64 = dfg_visualize.return_diagram_as_base64(activities_count, dfg_filtered_log, measure=measure)
#print(base64)
コード例 #20
0
    def makeDFG_connector(ConnectorBasicStructure, frequency_threshold,
                          dfg_path, **keyword_param):

        unique_activities = ConnectorBasicStructure['activity'].unique()
        unique_next_activities = ConnectorBasicStructure[
            'prev_activity'].unique()
        activitySet = set(unique_activities) | set(unique_next_activities)
        activityList = list(activitySet)
        activityList.sort()

        activityList.remove(':Start:')

        #edges
        groupedbyactivityPairs = ConnectorBasicStructure.groupby(
            ['prev_activity', 'activity']).size().reset_index(name='counts')

        #just to return as matrix
        ActActMatrix = np.zeros([len(activityList), len(activityList)])
        for prev_activity, activity in zip(
                ConnectorBasicStructure['prev_activity'],
                ConnectorBasicStructure['activity']):
            if (prev_activity == ":Start:"):
                continue
            ActActMatrix[activityList.index(prev_activity)][activityList.index(
                activity)] += 1

        edges_dict = {}

        sumFrequency = groupedbyactivityPairs['counts'].sum(
        ) - groupedbyactivityPairs.loc[
            groupedbyactivityPairs['prev_activity'] == ":Start:", 'counts'][0]

        #edges_list = []
        for index, row in groupedbyactivityPairs.iterrows():
            if (row['prev_activity'] == ":Start:"):
                continue
            #edge_dict = {}
            edge_list = []

            if (keyword_param['encryption']):
                edge_list.append(
                    Utilities.AES_ECB_Encrypt(
                        row['prev_activity'].encode('utf-8')[0:5],
                        keyword_param['key']))
                edge_list.append(
                    Utilities.AES_ECB_Encrypt(
                        row['activity'].encode('utf-8')[0:5],
                        keyword_param['key']))
            else:
                edge_list.append(row['prev_activity'])
                edge_list.append(row['activity'])
            edge_tuple = tuple(edge_list)
            if (row['counts'] / sumFrequency >= frequency_threshold):
                edges_dict[edge_tuple] = row['counts']
            #edges_list.append(edge_dict)
            #edges_dict.append(edge_dict)

        #nodes
        activity_frequencyDF = ConnectorBasicStructure.groupby(
            ['activity']).size().reset_index(name='counts')
        prev_activity_frequencyDF = ConnectorBasicStructure.groupby(
            ['prev_activity']).size().reset_index(name='counts')
        prev_activity_frequencyDF = prev_activity_frequencyDF.rename(
            columns={'prev_activity': 'activity'})
        final_activity_fequency = pd.concat([
            activity_frequencyDF, prev_activity_frequencyDF
        ]).drop_duplicates(subset='activity',
                           keep="first").reset_index(drop=True)

        nodes = final_activity_fequency.set_index('activity').T.to_dict(
            'records')
        nodes[0].pop(':Start:')
        #Making encrypted nodes
        nodes_new = {}
        for key, value in nodes[0].items():
            nodes_new[Utilities.AES_ECB_Encrypt(
                key.encode('utf-8'), keyword_param['key'])[0:5]] = value
        if (keyword_param['encryption']):
            gviz = dfg_vis_factory.apply(edges_dict,
                                         activities_count=nodes_new,
                                         parameters={"format": "svg"})
        else:
            gviz = dfg_vis_factory.apply(edges_dict,
                                         activities_count=nodes[0],
                                         parameters={"format": "svg"})

        if (keyword_param['visualization']):
            dfg_vis_factory.view(gviz)
            dfg_vis_factory.save(gviz, dfg_path)

        return ActActMatrix, activityList
コード例 #21
0
def create_graphs(without_error, log, approach):
    """
    creates visualization: Directly-Follows-Graph and Heuristic Net
    """

    # create dfg frequency
    path = "common_path"
    vis_type = "dfg_frequency"
    naming_error = "with_error"
    if without_error:
        naming_error = "no_error"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    parameters = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name",
        "format": "svg"
    }
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG frequency has been stored in '%s' in file '%s'", path,
                  file)

    # create dfg performance
    vis_type = "dfg_performance"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    variant = 'performance'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG performance has been stored in '%s' in file '%s'", path,
                  file)

    # create heuristic net
    vis_type = "heuristicnet"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.60
        })
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz)
    hn_vis.save(gviz, filename)
    log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path,
                  file)

    # save heuristic net in plain-ext format
    file = f"{vis_type}_{approach}_{naming_error}.plain-ext"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext"
    gviz = hn_vis.apply(heu_net,
                        parameters={
                            hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT:
                            "plain-ext"
                        })
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .plain-ext has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in dot format
    file = f"{vis_type}_{approach}_{naming_error}.dot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .dot has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in xdot format
    file = f"{vis_type}_{approach}_{naming_error}.xdot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .xdot has been stored in '%s' "
        "in file '%s'", path, file)
コード例 #22
0
def _discover_draw_process_model(
    case_type, hl_activity_types, time_type):
    from .utilities import _trim_activity_label_tail

    # TODO: handle process model discovery for CSV inputs
    el = session['event_log']
    fn_server = '{}.log.{}'.format(
        session.sid[:32], session['last_upload_event_log_filetype']
    )
    from pm4py.objects.log.importer.xes import importer as xes_importer
    pm4py_log = xes_importer.apply(join(
        app.config['TEMP'], fn_server
    ))

    exec_mode_miner = session['exec_mode_miner']
    sel_cases = exec_mode_miner.get_values_by_type(case_type) \
        if case_type != '' else set(el['case_id'])

    '''
    # NOTE: CSV only - trim the additional markings appended by Disco
    hl_activity_types = [_trim_activity_label_tail(x, r'-complete')
        for x in hl_activity_types]
    '''

    # filter event log
    from pm4py.objects.log.log import EventLog, Trace
    pm4py_log_filtered = EventLog()
    # filter event log: keep selected cases only
    for trace in pm4py_log:
        if trace.attributes['concept:name'] in sel_cases:
            pm4py_log_filtered.append(trace)

    from pm4py.algo.discovery.dfg import factory as dfg_miner
    dfg = dfg_miner.apply(pm4py_log_filtered)
    from pm4py.visualization.dfg import factory as dfg_vis_factory
    gviz = dfg_vis_factory.apply(dfg, log=pm4py_log_filtered, 
        variant='frequency', 
        parameters={'maxNoOfEdgesInDiagram': 30}
    )
    gv_source = gviz.source

    # find activity labels that should be highlighted
    hl_activities = []
    for at in hl_activity_types:
        hl_activities.extend(exec_mode_miner.get_values_by_type(at))

    # TODO: delegate to front-end: edit and annotate the graph
    import pygraphviz as pgv
    graph = pgv.AGraph(gviz.source)
    for node in graph.nodes():
        if node.attr['shape'] == 'box' and node.attr['label'] != '':
            # trim the count in the labels from DFG
            node.attr['label'] = _trim_activity_label_tail(
                node.attr['label'], r' \(\d+\)')
            node.attr['fontname'] = 'Helvetica'

            # TODO: NOT an elegant solution for highlighting purpose - need rev.
            if exec_mode_miner._atypes[node.attr['label']] \
                in hl_activity_types:
                # highlight
                node.attr['style'] = 'bold'
                node.attr['fontcolor'] = 'red3'
            else:
                node.attr['style'] = 'filled'
                node.attr['fillcolor'] = 'gainsboro'

    gv_source = graph.string()
    return gv_source, hl_activities
コード例 #23
0
ファイル: app.py プロジェクト: AakanchJoshi/Process-Mining
def directly_follows_graphs_perf(log_file):
    dfg = dfg_factory.apply(log_file)
    gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="performance")
    pn_vis_factory.save(gviz, "static/dag_performance.png")
    return "success!"
コード例 #24
0
# 1. Import libraries
import os
import csv
from pm4py.objects.log.importer.xes import factory as xes_importer
from pm4py.objects.conversion.dfg import factory as dfg_mining_factory
from pm4py.algo.discovery.dfg import factory as dfg_factory
from pm4py.visualization.dfg import factory as dfg_vis_factory
from pm4py.visualization.petrinet import factory as pn_vis_factory

# 2. preprocessing
with open('dfr1.txt') as file:
    file_reader = csv.reader(file, delimiter='\t')
    dfg = dict()
    for row in file_reader:
        _from, _to = row[0].split(',')
        rel = (_from, _to)
        freq = int(row[1])
        dfg[rel] = freq

# 3. Visualize Directly-follows-graph (DFG)
gviz = dfg_vis_factory.apply(dfg)
dfg_vis_factory.view(gviz)

# 4. Discover and Visualize Workflow-Net
net, im, fm = dfg_mining_factory.apply(dfg)
gviz = pn_vis_factory.apply(net, im, fm)
pn_vis_factory.view(gviz)
コード例 #25
0
def show(model, tel, file_name, parameters):
    '''
    Show model and its quality measures
    :param model: model type (transition system, state based region, DFG miner, alpha miner)
    :param tel: input log
    :param file_name: img file name to show model
    :param parameters: parmater for transition system (afreq, sfreq)
    :return:
    '''

    tel_flag = False
    if isinstance(tel[0][0], tel_event):
        tel_flag = True

    if model in ['ts', 'sbr']:
        if tel_flag:
            output_file_path = os.path.join(
                "static", "images", file_name[:file_name.find('.')] + '_' +
                model + '_' + str(parameters['afreq_thresh']) + '_' +
                str(parameters['sfreq_thresh']) + ".png")
        else:
            output_file_path = os.path.join(
                "static", "images",
                "2" + "_" + file_name[:file_name.find('.')] + '_' + model +
                '_' + str(parameters[PARAM_KEY_DIRECTION]) + '_' +
                str(parameters[PARAM_KEY_WINDOW]) + "_" +
                str(parameters[PARAM_KEY_VIEW]) + ".png")
        auto = utils.discover_annotated_automaton(tel, parameters=parameters)

        max_thresh = {}
        max_afreq = 0
        max_sfreq = 0

        if tel_flag:
            for trans in auto.transitions:
                max_afreq = max(max_afreq, trans.afreq)
            for state in auto.states:
                max_sfreq = max(max_sfreq, state.sfreq)
        max_thresh['afreq'] = max_afreq
        max_thresh['sfreq'] = max_sfreq

        if model == 'ts':
            result = {}
            gviz = vis_factory.apply(auto)
            vis_factory.save(gviz, output_file_path)
            result['num of transitions'] = len(auto.transitions)
            result['num of states'] = len(auto.states)

        else:
            net, im, fm = sb.petri_net_synthesis(auto)
            gviz = petri_vis_factory.apply(net, im, fm)
            petri_vis_factory.save(gviz, output_file_path)
            result = evaluation(net, im, fm, tel)

    else:
        if tel_flag:
            output_file_path = os.path.join(
                "static", "images",
                file_name[:file_name.find('.')] + '_' + model + '_' + ".png")
        else:
            output_file_path = os.path.join(
                "static", "images", "2" + file_name[:file_name.find('.')] +
                '_' + model + '_' + ".png")

        if model == 'alpha':
            if isinstance(tel[0][0], Event):
                net, im, fm = trans_alpha(tel)
            else:
                net, im, fm = alpha_miner.apply(tel)
            gviz = petri_vis_factory.apply(net, im, fm)
            petri_vis_factory.save(gviz, output_file_path)
            result = evaluation(net, im, fm, tel)

        else:
            dfg = dfg_factory.apply(tel)
            if tel_flag:
                dfg_tel = inductive_revise.get_dfg_graph_trans(tel)
                #dfg = dfg_tel + dfg
                dfg = dfg_tel

            gviz = dfg_vis_factory.apply(dfg, log=tel)
            dfg_vis_factory.save(gviz, output_file_path)
            result = dict(
                sorted(dfg.items(), key=operator.itemgetter(1), reverse=True))

        max_thresh = None

    return output_file_path, result, max_thresh
コード例 #26
0
from pm4py.visualization.dfg import factory as dfg_vis_factory
from pm4pyspark.importer.csv import spark_df_imp as importer
from pm4pyspark.algo.discovery.dfg import factory as dfg_factory

parameters = {"format": "svg"}

event_stream_ex = importer.import_event_stream(os.path.join(
    INPUT_DATA_DIR, "running-example.csv"),
                                               parameters={"header": True})
log_ex = importer.transform_event_stream_to_event_log(event_stream_ex)
df_ex = importer.import_sparkdf_from_path(os.path.join(INPUT_DATA_DIR,
                                                       "running-example.csv"),
                                          header=True,
                                          inferSchema=True)

dfg_freq = dfg_factory.apply(df_ex)
gviz_freq = dfg_vis_factory.apply(dfg_freq,
                                  log=log_ex,
                                  parameters=parameters,
                                  variant="frequency")
dfg_vis_factory.save(gviz_freq,
                     os.path.join(OUTPUT_DATA_DIR, "running-example_freq.svg"))

dfg_perf = dfg_factory.apply(df_ex, variant="performance")
gviz_perf = dfg_vis_factory.apply(dfg_perf,
                                  log=log_ex,
                                  parameters=parameters,
                                  variant="performance")
dfg_vis_factory.save(gviz_perf,
                     os.path.join(OUTPUT_DATA_DIR, "running-example_perf.svg"))
コード例 #27
0
ファイル: app.py プロジェクト: AakanchJoshi/Process-Mining
def directly_follows_graphs_freq(log_file):
    dfg = dfg_factory.apply(log_file)
    gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="frequency")
    pn_vis_factory.save(gviz, "static/dag_frequency.png")
    return "success!"
コード例 #28
0
# from pm4py.algo.discovery.heuristics import factory as heuristics_miner
# heu_net = heuristics_miner.apply_heu(log, parameters={"dependency_thresh": 0.99})

########################################

#### mean of number of days between each process #######

from pm4py.algo.discovery.dfg import factory as dfg_factory

dfg = dfg_factory.apply(conv_log, variant="performance")
parameters = {"format": "png"}

from pm4py.visualization.dfg import factory as dfg_vis_factory

gviz = dfg_vis_factory.apply(dfg,
                             log=conv_log,
                             variant="performance",
                             parameters=parameters)
dfg_vis_factory.save(gviz, "performance_invoice.png")
dfg_vis_factory.view(gviz)

##################################

#### mean of frequency between each process #######
from pm4py.algo.discovery.dfg import factory as dfg_factory

from pm4py.visualization.dfg import factory as dfg_vis_factory

dfg = dfg_factory.apply(conv_log)
parameters = {"format": "png"}

gviz1 = dfg_vis_factory.apply(dfg,
コード例 #29
0
def apply(log, parameters=None):
    """
    Gets the frequency DFG

    Parameters
    ------------
    log
        Log
    parameters
        Parameters of the algorithm

    Returns
    ------------
    base64
        Base64 of an SVG representing the model
    model
        Text representation of the model
    format
        Format of the model
    """
    if parameters is None:
        parameters = {}

    decreasingFactor = parameters[
        "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR

    activity_key = parameters[
        pm4_constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY

    log = attributes_filter.filter_log_on_max_no_activities(
        log,
        max_no_activities=constants.MAX_NO_ACTIVITIES,
        parameters=parameters)
    filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters)

    activities_count = attributes_filter.get_attribute_values(
        filtered_log, activity_key)
    activities = list(activities_count.keys())
    start_activities = list(
        start_activities_filter.get_start_activities(
            filtered_log, parameters=parameters).keys())
    end_activities = list(
        end_activities_filter.get_end_activities(filtered_log,
                                                 parameters=parameters).keys())

    dfg = dfg_factory.apply(filtered_log, parameters=parameters)
    dfg = clean_dfg_based_on_noise_thresh(
        dfg,
        activities,
        decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER,
        parameters=parameters)

    parameters["format"] = "svg"
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities

    gviz = dfg_vis_factory.apply(dfg,
                                 log=filtered_log,
                                 variant="frequency",
                                 parameters=parameters)

    gviz_base64 = base64.b64encode(str(gviz).encode('utf-8'))

    ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities,
                                             end_activities)

    net, im, fm = dfg_conv_factory.apply(dfg,
                                         parameters={
                                             "start_activities":
                                             start_activities,
                                             "end_activities": end_activities
                                         })

    return get_base64_from_gviz(gviz), export_petri_as_string(
        net, im, fm
    ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
コード例 #30
0
#-----------------------------
#libraries

import os
from pm4py.objects.log.importer.xes import factory as xes_importer
log = xes_importer.import_log(os.path.join("pmdata/", "running-example.xes"))
log
from pm4py.algo.discovery.alpha import factory as alpha_miner
net, initial_marking, final_marking = alpha_miner.apply(log)
net

#direct flow
import os
from pm4py.objects.log.importer.xes import factory as xes_importer
log = xes_importer.import_log(os.path.join("pmdata/running-example.xes"))

from pm4py.algo.discovery.dfg import factory as dfg_factory
dfg = dfg_factory.apply(log)

from pm4py.visualization.dfg import factory as dfg_vis_factory
gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
dfg_vis_factory.view(gviz)

#decoration
from pm4py.algo.discovery.dfg import factory as dfg_factory
from pm4py.visualization.dfg import factory as dfg_vis_factory

dfg = dfg_factory.apply(log, variant="performance")
gviz = dfg_vis_factory.apply(dfg, log=log, variant="performance")
dfg_vis_factory.view(gviz)