Esempio n. 1
0
 def test_csv1documentation(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     import os
     from pm4py.objects.log.importer.csv import factory as csv_importer
     event_log = csv_importer.import_log(
         os.path.join("input_data", "running-example.csv"))
     event_log_length = len(event_log)
     del event_log_length
     from pm4py.objects.log import transform
     trace_log = transform.transform_event_log_to_trace_log(
         event_log, case_glue="case:concept:name")
     del trace_log
     from pm4py.objects.log.importer.csv.versions import pandas_df_imp
     from pm4py.objects.log import transform
     dataframe = pandas_df_imp.import_dataframe_from_path(
         os.path.join("input_data", "running-example.csv"))
     event_log = pandas_df_imp.convert_dataframe_to_event_log(dataframe)
     trace_log = transform.transform_event_log_to_trace_log(
         event_log, case_glue="case:concept:name")
     from pm4py.objects.log.exporter.csv import factory as csv_exporter
     csv_exporter.export_log(event_log, "outputFile1.csv")
     os.remove("outputFile1.csv")
     from pm4py.objects.log.exporter.csv import factory as csv_exporter
     csv_exporter.export_log(trace_log, "outputFile2.csv")
     os.remove("outputFile2.csv")
 def test_importExportCSVtoCSV(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     event_log = csv_importer.import_log(
         os.path.join(INPUT_DATA_DIR, "running-example.csv"))
     event_log = sorting.sort_timestamp(event_log)
     event_log = sampling.sample(event_log)
     event_log = index_attribute.insert_event_index_as_event_attribute(
         event_log)
     trace_log = log_transform.transform_event_log_to_trace_log(event_log)
     trace_log = sorting.sort_timestamp(trace_log)
     trace_log = sampling.sample(trace_log)
     trace_log = index_attribute.insert_trace_index_as_event_attribute(
         trace_log)
     event_log_transformed = log_transform.transform_trace_log_to_event_log(
         trace_log)
     csv_exporter.export_log(
         event_log_transformed,
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     event_log_imported_after_export = csv_importer.import_log(
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     trace_log_imported_after_export = log_transform.transform_event_log_to_trace_log(
         event_log_imported_after_export)
     self.assertEqual(len(trace_log), len(trace_log_imported_after_export))
     os.remove(os.path.join(OUTPUT_DATA_DIR,
                            "running-example-exported.csv"))
Esempio n. 3
0
def apply(object,
          petri_net,
          initial_marking,
          final_marking,
          parameters=None,
          version=VERSION_STATE_EQUATION_A_STAR):
    if isinstance(object, pm4py.objects.log.log.Trace):
        return apply_trace(object, petri_net, initial_marking, final_marking,
                           parameters, version)
    elif isinstance(object, pm4py.objects.log.log.TraceLog):
        return apply_log(object, petri_net, initial_marking, final_marking,
                         parameters, version)
    elif isinstance(object, pm4py.objects.log.log.EventLog):
        if log_util.PARAMETER_KEY_CASE_GLUE in parameters:
            glue = parameters[log_util.PARAMETER_KEY_CASE_GLUE]
        else:
            glue = log_util.CASE_ATTRIBUTE_GLUE
        if log_util.PARAMETER_KEY_CASE_ATTRIBUTE_PRFIX in parameters:
            case_pref = parameters[log_util.PARAMETER_KEY_CASE_ATTRIBUTE_PRFIX]
        else:
            case_pref = log_util.CASE_ATTRIBUTE_PREFIX
        trace_log = log_transform.transform_event_log_to_trace_log(
            object,
            case_glue=glue,
            includes_case_attributes=False,
            case_attribute_prefix=case_pref)
        return apply_log(object, petri_net, initial_marking, final_marking,
                         parameters, version)
Esempio n. 4
0
def export_log_tree(log):
    """
    Get XES log XML tree from a PM4Py trace log

    Parameters
    -----------
    log
        PM4Py trace log

    Returns
    -----------
    tree
        XML tree
    """
    # If the log is in log_instance.EventLog, then transform it into log_instance.TraceLog format
    if type(log) is log_instance.EventLog:
        log = log_transform.transform_event_log_to_trace_log(log)
    root = etree.Element(xes_util.TAG_LOG)

    # add attributes at the log level
    export_attributes(log, root)
    # add extensions at the log level
    export_extensions(log, root)
    # add globals at the log level
    export_globals(log, root)
    # add classifiers at the log level
    export_classifiers(log, root)
    # add traces at the log level
    export_traces(log, root)

    tree = etree.ElementTree(root)

    return tree
Esempio n. 5
0
 def obtainPetriNetThroughImdf(self, log_name):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     if ".xes" in log_name:
         trace_log = xes_importer.import_log(log_name)
     else:
         event_log = csv_importer.import_log(log_name)
         trace_log = log_transform.transform_event_log_to_trace_log(event_log)
     net, marking, final_marking = inductive_miner.apply(trace_log, None)
     return trace_log, net, marking, final_marking
Esempio n. 6
0
    def obtainPetriNetThroughAlphaMiner(self, log_name):
        # to avoid static method warnings in tests,
        # that by construction of the unittest package have to be expressed in such way
        self.dummy_variable = "dummy_value"

        if ".xes" in log_name:
            trace_log = xes_importer.import_log(log_name)
        else:
            event_log = csv_importer.import_log(log_name)
            trace_log = log_transform.transform_event_log_to_trace_log(
                event_log)
        net, marking, fmarking = alpha_factory.apply(trace_log)
        soundness = check_soundness.check_petri_wfnet_and_soundness(net)
        del soundness

        return trace_log, net, marking, fmarking
Esempio n. 7
0
 def test_importExportXEStoCSV(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     trace_log = xes_importer.import_log(
         os.path.join(INPUT_DATA_DIR, "running-example.xes"))
     event_log = log_transform.transform_trace_log_to_event_log(trace_log)
     csv_exporter.export_log(
         event_log,
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     event_log_newimport = csv_importer.import_log(
         os.path.join(OUTPUT_DATA_DIR, "running-example-exported.csv"))
     trace_log_imported_after_export = log_transform.transform_event_log_to_trace_log(
         event_log_newimport)
     self.assertEqual(len(trace_log), len(trace_log_imported_after_export))
     os.remove(os.path.join(OUTPUT_DATA_DIR,
                            "running-example-exported.csv"))
Esempio n. 8
0
 def test_prefiltering_dataframe(self):
     # to avoid static method warnings in tests,
     # that by construction of the unittest package have to be expressed in such way
     self.dummy_variable = "dummy_value"
     input_log = os.path.join(INPUT_DATA_DIR, "running-example.csv")
     dataframe = csv_import_adapter.import_dataframe_from_path_wo_timeconversion(
         input_log, sep=',')
     dataframe = attributes_filter.filter_df_keeping_spno_activities(
         dataframe, activity_key="concept:name")
     dataframe = case_filter.filter_on_ncases(
         dataframe, case_id_glue="case:concept:name")
     dataframe = csv_import_adapter.convert_timestamp_columns_in_df(
         dataframe)
     dataframe = dataframe.sort_values('time:timestamp')
     event_log = pandas_df_imp.convert_dataframe_to_event_log(dataframe)
     trace_log = transform.transform_event_log_to_trace_log(event_log)
     del trace_log
Esempio n. 9
0
def apply_events(trace_log, dt1, dt2, parameters=None):
    """
    Get a new trace log containing all the events contained in the given interval

    Parameters
    -----------
    trace_log
        Trace log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered trace log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    event_log = transform.transform_trace_log_to_event_log(trace_log)
    filtered_event_log = EventLog([
        x for x in event_log
        if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2
    ])
    filtered_trace_log = transform.transform_event_log_to_trace_log(
        filtered_event_log)

    return filtered_trace_log
Esempio n. 10
0
from pm4py.visualization.petrinet import factory as pn_vis_factory

import os

rename = {
    "ActivityName": "concept:name",
    "TimeStamp": "time:timestamp",
    "Event_type": "lifecycle:transition",
    "ProcessInstanceId": "case:concept:name",
}

event_log = json_importer.import_log(os.path.join("form_json.json"),
                                     parameters={"rename_map": rename})
event_log.sort()

trace_log = log_transform.transform_event_log_to_trace_log(event_log)
trace_log.sort()
trace_log.insert_trace_index_as_event_attribute()
net, marking, final_marking = alpha_factory.apply(trace_log)

for place in marking:
    print("initial marking " + place.name)
for place in final_marking:
    print("final marking " + place.name)
gviz = pn_vis_factory.apply(net,
                            marking,
                            final_marking,
                            parameters={"format": "svg"})
pn_vis_factory.view(gviz)

#xes_exporter.export_log(trace_log, "form_json.xes")
def apply(df, parameters=None):
    """
    Returns a Pandas dataframe from which a sound workflow net could be extracted taking into account
    a discovery algorithm returning models only with visible transitions

    Parameters
    ------------
    df
        Pandas dataframe
    parameters
        Possible parameters of the algorithm, including:
            max_no_variants -> Maximum number of variants to consider to return a Petri net

    Returns
    ------------
    filtered_df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    if PARAMETER_CONSTANT_CASEID_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_CASEID_KEY] = CASE_CONCEPT_NAME
    if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = DEFAULT_NAME_KEY
    if PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] = DEFAULT_TIMESTAMP_KEY
    if PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters:
        parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[
            PARAMETER_CONSTANT_ACTIVITY_KEY]

    caseid_glue = parameters[PARAMETER_CONSTANT_CASEID_KEY]
    activity_key = parameters[PARAMETER_CONSTANT_ACTIVITY_KEY]
    timest_key = parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY]

    max_no_variants = parameters[
        "max_no_variants"] if "max_no_variants" in parameters else 20

    variants_df = case_statistics.get_variants_df(df, parameters=parameters)
    parameters["variants_df"] = variants_df

    variant_stats = case_statistics.get_variant_statistics(
        df, parameters=parameters)

    all_variants_list = []
    for var in variant_stats:
        all_variants_list.append([var["variant"], var[caseid_glue]])

    all_variants_list = sorted(all_variants_list,
                               key=lambda x: (x[1], x[0]),
                               reverse=True)

    considered_variants = []
    considered_traces = []

    i = 0
    while i < min(len(all_variants_list), max_no_variants):
        variant = all_variants_list[i][0]

        considered_variants.append(variant)

        filtered_df = variants_filter.apply(df,
                                            considered_variants,
                                            parameters=parameters)

        dfg_frequency = dfg_util.get_dfg_graph(filtered_df,
                                               measure="frequency",
                                               perf_aggregation_key="median",
                                               case_id_glue=caseid_glue,
                                               activity_key=activity_key,
                                               timestamp_key=timest_key)

        net, initial_marking, final_marking = alpha_miner.apply_dfg(
            dfg_frequency, parameters=parameters)

        is_sound = check_soundness.check_petri_wfnet_and_soundness(net)
        if not is_sound:
            del considered_variants[-1]
        else:
            traces_of_this_variant = variants_filter.apply(
                df, [variant], parameters=parameters).groupby(caseid_glue)
            traces_of_this_variant_keys = list(
                traces_of_this_variant.groups.keys())
            trace_of_this_variant = traces_of_this_variant.get_group(
                traces_of_this_variant_keys[0])

            this_trace = transform.transform_event_log_to_trace_log(
                pandas_df_imp.convert_dataframe_to_event_log(
                    trace_of_this_variant),
                case_glue=caseid_glue)[0]
            if not activity_key == DEFAULT_NAME_KEY:
                for j in range(len(this_trace)):
                    this_trace[j][DEFAULT_NAME_KEY] = this_trace[j][
                        activity_key]
            considered_traces.append(this_trace)
            filtered_log = TraceLog(considered_traces)

            try:
                alignments = alignment_factory.apply(filtered_log, net,
                                                     initial_marking,
                                                     final_marking)
                del alignments
                fitness = replay_fitness_factory.apply(filtered_log,
                                                       net,
                                                       initial_marking,
                                                       final_marking,
                                                       parameters=parameters)
                if fitness["log_fitness"] < 0.99999:
                    del considered_variants[-1]
                    del considered_traces[-1]
            except TypeError:
                del considered_variants[-1]
                del considered_traces[-1]

        i = i + 1

    return variants_filter.apply(df,
                                 considered_variants,
                                 parameters=parameters)