Example #1
0
def execute_script():
    log_path = os.path.join("..", "tests", "input_data", "running-example.xes")
    log = xes_importer.apply(log_path)
    dfg = dfg_factory.apply(log)
    dfg_gv = dfg_vis_fact.apply(dfg, log, parameters={"format": "svg"})
    dfg_vis_fact.view(dfg_gv)
    net, im, fm = dfg_conv_factory.apply(dfg)
    gviz = pn_vis_factory.apply(net, im, fm, parameters={"format": "svg"})
    pn_vis_factory.view(gviz)
Example #2
0
def generate_process_model(log):
    '''
    Description: to generate graphical process model in
                .svg format using pm4py library function
    Used: generate process model under provided log
    Input: log file
    Output: Display process model
    '''

    dfg = dfg_factory.apply(log)
    '''To decorate DFG with the frequency of activities'''
    gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency")
    dfg_vis_factory.view(gviz)
    return dfg
def execute_script():
    # import csv & create log
    dataframe = csv_import_adapter.import_dataframe_from_path(
        datasourceMockdata(), sep=";")
    dataframe = dataframe.rename(columns={
        'coID': 'case:concept:name',
        'Activity': 'concept:name'
    })
    log = conversion_factory.apply(dataframe)

    # option 1: Directly-Follows Graph, represent frequency or performance
    parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"}
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz1 = dfg_vis_factory.apply(dfg,
                                  log=log,
                                  variant=variant,
                                  parameters=parameters)
    dfg_vis_factory.view(gviz1)

    # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg)
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz2 = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz2)

    # option 3: Petri Net based on Heuristic Miner (.png)
    net, im, fm = heuristics_miner.apply(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.00
        })
    gviz3 = petri_vis.apply(
        net,
        im,
        fm,
        parameters={
            petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png"
        })
    petri_vis.view(gviz3)
5	load-selection	0.033676
20	blur-scroll	0.033074
26	selection-scroll	0.032246
31	click-0-load	0.029780
'''
#-----------------

from pm4py.objects.log.importer.csv import factory as csv_importer
excellentLog1A = csv_importer.import_event_stream('Excellent1A_fixed.csv')
from pm4py.objects.conversion.log import factory as conversion_factory
log1 = conversion_factory.apply(excellentLog1A)

from pm4py.visualization.dfg import factory as dfg_vis_factory

gviz = dfg_vis_factory.apply(dfg1, log=log1, variant="frequency")
dfg_vis_factory.view(gviz)

from pm4py.objects.conversion.dfg import factory as dfg_mining_factory

net, im, fm = dfg_mining_factory.apply(dfg1)

from pm4py.visualization.petrinet import factory as pn_vis_factory

gviz = pn_vis_factory.apply(net, im, fm)
pn_vis_factory.view(gviz)

from pm4py.evaluation.replay_fitness import factory as replay_factory
fitness_alpha = replay_factory.apply(log1, net, im, fm)

from pm4py.algo.conformance.alignments import factory as align_factory
def create_graphs(without_error, log, approach):
    """
    creates visualization: Directly-Follows-Graph and Heuristic Net
    """

    # create dfg frequency
    path = "common_path"
    vis_type = "dfg_frequency"
    naming_error = "with_error"
    if without_error:
        naming_error = "no_error"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    parameters = {
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name",
        "format": "svg"
    }
    variant = 'frequency'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG frequency has been stored in '%s' in file '%s'", path,
                  file)

    # create dfg performance
    vis_type = "dfg_performance"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    variant = 'performance'
    dfg = dfg_factory.apply(log, variant=variant, parameters=parameters)
    gviz = dfg_vis_factory.apply(dfg,
                                 log=log,
                                 variant=variant,
                                 parameters=parameters)
    dfg_vis_factory.view(gviz)
    dfg_vis_factory.save(gviz, filename)
    log_info.info("DFG performance has been stored in '%s' in file '%s'", path,
                  file)

    # create heuristic net
    vis_type = "heuristicnet"
    file = f"{vis_type}_{approach}_{naming_error}.svg"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg"
    heu_net = heuristics_miner.apply_heu(
        log,
        parameters={
            heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
            0.60
        })
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"})
    hn_vis.view(gviz)
    hn_vis.save(gviz, filename)
    log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path,
                  file)

    # save heuristic net in plain-ext format
    file = f"{vis_type}_{approach}_{naming_error}.plain-ext"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext"
    gviz = hn_vis.apply(heu_net,
                        parameters={
                            hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT:
                            "plain-ext"
                        })
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .plain-ext has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in dot format
    file = f"{vis_type}_{approach}_{naming_error}.dot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .dot has been stored in '%s' "
        "in file '%s'", path, file)

    # save heuristic net in xdot format
    file = f"{vis_type}_{approach}_{naming_error}.xdot"
    filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot"
    gviz = hn_vis.apply(
        heu_net,
        parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"})
    hn_vis.save(gviz, filename)
    log_info.info(
        "Heuristic Net as .xdot has been stored in '%s' "
        "in file '%s'", path, file)
Example #6
0
    def makeDFG_connector(ConnectorBasicStructure, frequency_threshold,
                          dfg_path, **keyword_param):

        unique_activities = ConnectorBasicStructure['activity'].unique()
        unique_next_activities = ConnectorBasicStructure[
            'prev_activity'].unique()
        activitySet = set(unique_activities) | set(unique_next_activities)
        activityList = list(activitySet)
        activityList.sort()

        activityList.remove(':Start:')

        #edges
        groupedbyactivityPairs = ConnectorBasicStructure.groupby(
            ['prev_activity', 'activity']).size().reset_index(name='counts')

        #just to return as matrix
        ActActMatrix = np.zeros([len(activityList), len(activityList)])
        for prev_activity, activity in zip(
                ConnectorBasicStructure['prev_activity'],
                ConnectorBasicStructure['activity']):
            if (prev_activity == ":Start:"):
                continue
            ActActMatrix[activityList.index(prev_activity)][activityList.index(
                activity)] += 1

        edges_dict = {}

        sumFrequency = groupedbyactivityPairs['counts'].sum(
        ) - groupedbyactivityPairs.loc[
            groupedbyactivityPairs['prev_activity'] == ":Start:", 'counts'][0]

        #edges_list = []
        for index, row in groupedbyactivityPairs.iterrows():
            if (row['prev_activity'] == ":Start:"):
                continue
            #edge_dict = {}
            edge_list = []

            if (keyword_param['encryption']):
                edge_list.append(
                    Utilities.AES_ECB_Encrypt(
                        row['prev_activity'].encode('utf-8')[0:5],
                        keyword_param['key']))
                edge_list.append(
                    Utilities.AES_ECB_Encrypt(
                        row['activity'].encode('utf-8')[0:5],
                        keyword_param['key']))
            else:
                edge_list.append(row['prev_activity'])
                edge_list.append(row['activity'])
            edge_tuple = tuple(edge_list)
            if (row['counts'] / sumFrequency >= frequency_threshold):
                edges_dict[edge_tuple] = row['counts']
            #edges_list.append(edge_dict)
            #edges_dict.append(edge_dict)

        #nodes
        activity_frequencyDF = ConnectorBasicStructure.groupby(
            ['activity']).size().reset_index(name='counts')
        prev_activity_frequencyDF = ConnectorBasicStructure.groupby(
            ['prev_activity']).size().reset_index(name='counts')
        prev_activity_frequencyDF = prev_activity_frequencyDF.rename(
            columns={'prev_activity': 'activity'})
        final_activity_fequency = pd.concat([
            activity_frequencyDF, prev_activity_frequencyDF
        ]).drop_duplicates(subset='activity',
                           keep="first").reset_index(drop=True)

        nodes = final_activity_fequency.set_index('activity').T.to_dict(
            'records')
        nodes[0].pop(':Start:')
        #Making encrypted nodes
        nodes_new = {}
        for key, value in nodes[0].items():
            nodes_new[Utilities.AES_ECB_Encrypt(
                key.encode('utf-8'), keyword_param['key'])[0:5]] = value
        if (keyword_param['encryption']):
            gviz = dfg_vis_factory.apply(edges_dict,
                                         activities_count=nodes_new,
                                         parameters={"format": "svg"})
        else:
            gviz = dfg_vis_factory.apply(edges_dict,
                                         activities_count=nodes[0],
                                         parameters={"format": "svg"})

        if (keyword_param['visualization']):
            dfg_vis_factory.view(gviz)
            dfg_vis_factory.save(gviz, dfg_path)

        return ActActMatrix, activityList
Example #7
0
def execute_script():
    time1 = time.time()
    dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(
        inputLog, sep=SEP, quotechar=QUOTECHAR)
    time2 = time.time()
    print("time2 - time1: " + str(time2 - time1))
    parameters_filtering = {
        constants.PARAMETER_CONSTANT_CASEID_KEY: CASEID_GLUE,
        constants.PARAMETER_CONSTANT_ACTIVITY_KEY: ACTIVITY_KEY
    }
    if enable_auto_filter:
        dataframe = auto_filter.apply_auto_filter(
            dataframe, parameters=parameters_filtering)
    else:
        dataframe = attributes_filter.apply_auto_filter(
            dataframe, parameters=parameters_filtering)
    time3 = time.time()
    print("time3 - time2: " + str(time3 - time2))
    if enable_filtering_on_cases:
        dataframe = case_filter.filter_on_ncases(dataframe,
                                                 case_id_glue=CASEID_GLUE,
                                                 max_no_cases=max_no_cases)
    time4 = time.time()
    dataframe = csv_import_adapter.convert_caseid_column_to_str(
        dataframe, case_id_glue=CASEID_GLUE)
    dataframe = csv_import_adapter.convert_timestamp_columns_in_df(
        dataframe, timest_columns=TIMEST_COLUMNS, timest_format=TIMEST_FORMAT)
    time6 = time.time()
    print("time6 - time4: " + str(time6 - time4))
    # dataframe = dataframe.sort_values('time:timestamp')
    time7 = time.time()
    print("time7 - time6: " + str(time7 - time6))

    # show the filtered dataframe on the screen
    activities_count = attributes_filter.get_attribute_values(
        dataframe, attribute_key=ACTIVITY_KEY)
    [dfg_frequency, dfg_performance
     ] = df_statistics.get_dfg_graph(dataframe,
                                     measure="both",
                                     perf_aggregation_key="median",
                                     case_id_glue=CASEID_GLUE,
                                     activity_key=ACTIVITY_KEY,
                                     timestamp_key=TIMEST_KEY)
    if enable_filtering_df:
        print("len dfg_frequency 0=", len(dfg_frequency))
        dfg_frequency = dfg_filtering.apply(
            dfg_frequency, {"noiseThreshold": filtering_df_noise})
        print("len dfg_frequency 1=", len(dfg_frequency))
    time8 = time.time()
    print("time8 - time7: " + str(time8 - time7))
    gviz = dfg_vis_factory.apply(dfg_frequency,
                                 activities_count=activities_count,
                                 parameters={"format": "svg"})
    dfg_vis_factory.view(gviz)
    net, initial_marking, final_marking = inductive_factory.apply_dfg(
        dfg_frequency)
    # net, initial_marking, final_marking = alpha_factory.apply_dfg(dfg_frequency)
    spaths = get_shortest_paths(net)
    time9 = time.time()
    print("time9 - time8: " + str(time9 - time8))
    aggregated_statistics = get_decorations_from_dfg_spaths_acticount(
        net, dfg_performance, spaths, activities_count, variant="performance")
    gviz = pn_vis_factory.apply(net,
                                initial_marking,
                                final_marking,
                                variant="performance",
                                aggregated_statistics=aggregated_statistics,
                                parameters={"format": "svg"})
    time10 = time.time()
    print("time10 - time9: " + str(time10 - time9))
    print("time10 - time1: " + str(time10 - time1))
    pn_vis_factory.view(gviz)