예제 #1
0
def execute_script():
    dataframe1 = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "receipt_even.csv"))
    dataframe1 = pm4py.format_dataframe(dataframe1)
    dataframe2 = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "receipt_odd.csv"))
    dataframe2 = pm4py.format_dataframe(dataframe2)
    case_relations = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "case_relations.csv"))
    merged = case_relations_merging.apply(dataframe1, dataframe2,
                                          case_relations)
    dfg, sa, ea = pm4py.discover_dfg(merged)
    pm4py.view_dfg(dfg, sa, ea, format="svg")
예제 #2
0
def execute_script():
    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes"))
    pm4py.view_performance_spectrum(log, ["Confirmation of receipt", "T04 Determine confirmation of receipt",
                                         "T10 Determine necessity to stop indication"], format="svg")
    df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv"))
    df = pm4py.format_dataframe(df)
    pm4py.view_performance_spectrum(df, ["Confirmation of receipt", "T04 Determine confirmation of receipt",
                                         "T10 Determine necessity to stop indication"], format="svg")
예제 #3
0
 def test_csv(self):
     df = pd.read_csv("input_data/running-example.csv")
     df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name",
                                 timestamp_key="time:timestamp")
     log2 = pm4py.convert_to_event_log(df)
     stream1 = pm4py.convert_to_event_stream(log2)
     df2 = pm4py.convert_to_dataframe(log2)
     pm4py.write_xes(log2, "test_output_data/log.xes")
     os.remove("test_output_data/log.xes")
예제 #4
0
 def test_statistics_df(self):
     df = pd.read_csv("input_data/running-example.csv")
     df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name",
                                 timestamp_key="time:timestamp")
     pm4py.get_start_activities(df)
     pm4py.get_end_activities(df)
     pm4py.get_attributes(df)
     pm4py.get_attribute_values(df, "org:resource")
     pm4py.get_variants(df)
예제 #5
0
def execute_script():
    receipt_even = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "receipt_even.csv"))
    receipt_even = pm4py.format_dataframe(receipt_even)
    receipt_odd = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "receipt_odd.csv"))
    receipt_odd = pm4py.format_dataframe(receipt_odd)
    case_relations = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "case_relations.csv"))
    interleavings_dataframe = interleavings_miner.apply(
        receipt_even, receipt_odd, case_relations)
    print(interleavings_dataframe)
    # print the frequency and the direction of the interleavings
    print(interleavings_dataframe[[
        "@@source_activity", "@@target_activity", "@@direction"
    ]].value_counts())
    # print the performance of the interleavings
    print(
        interleavings_dataframe.groupby(
            ["@@source_activity", "@@target_activity",
             "@@direction"])["@@timestamp_diff"].agg("mean"))
    # visualizes the frequency of the interleavings
    gviz_freq = interleavings_visualizer.apply(receipt_even,
                                               receipt_odd,
                                               interleavings_dataframe,
                                               parameters={
                                                   "annotation": "frequency",
                                                   "format": "svg"
                                               })
    interleavings_visualizer.view(gviz_freq)
    # visualizes the performance of the interleavings
    gviz_perf = interleavings_visualizer.apply(receipt_even,
                                               receipt_odd,
                                               interleavings_dataframe,
                                               parameters={
                                                   "annotation": "performance",
                                                   "aggregation_measure":
                                                   "median",
                                                   "format": "svg"
                                               })
    interleavings_visualizer.view(gviz_perf)
예제 #6
0
def execute_script():
    log = pd.read_csv(
        os.path.join("..", "tests", "input_data", "running-example.csv"))
    log = pm4py.format_dataframe(log)
    # Metric RBI 1.1: Number of distinct activities done by a resource in a given time interval [t1, t2)
    print(
        algorithm.distinct_activities(log, "2010-12-30 00:00:00",
                                      "2011-01-25 00:00:00", "Sara"))
    # Metric RBI 1.3: Fraction of completions of a given activity a, by a given resource r,
    # during a given time slot, [t1, t2), with respect to the total number of activity completions by resource r
    # during [t1, t2)
    print(
        algorithm.activity_frequency(log, "2010-12-30 00:00:00",
                                     "2011-01-25 00:00:00", "Sara", "decide"))
    # Metric RBI 2.1: The number of activity instances completed by a given resource during a given time slot.
    print(
        algorithm.activity_completions(log, "2010-12-30 00:00:00",
                                       "2011-01-25 00:00:00", "Sara"))
    # Metric RBI 2.2: The number of cases completed during a given time slot in which a given resource was involved.
    print(
        algorithm.case_completions(log, "2010-12-30 00:00:00",
                                   "2011-01-25 00:00:00", "Pete"))
    # Metric RBI 2.3: The fraction of cases completed during a given time slot in which a given resource was involved
    # with respect to the total number of cases completed during the time slot.
    print(
        algorithm.fraction_case_completions(log, "2010-12-30 00:00:00",
                                            "2011-01-25 00:00:00", "Pete"))
    # Metric RBI 2.4: The average number of activities started by a given resource but not completed at a moment in time.
    print(
        algorithm.average_workload(log, "2010-12-30 00:00:00",
                                   "2011-01-15 00:00:00", "Mike"))
    # Metric RBI 3.1: The fraction of active time during which a given resource is involved in more than one activity
    # with respect to the resource's active time.
    print(
        algorithm.multitasking(log, "2010-12-30 00:00:00",
                               "2011-01-25 00:00:00", "Mike"))
    # Metric RBI 4.3: The average duration of instances of a given activity completed during a given time slot by
    # a given resource.
    print(
        algorithm.average_duration_activity(log, "2010-12-30 00:00:00",
                                            "2011-01-25 00:00:00", "Sue",
                                            "examine thoroughly"))
    # Metric RBI 4.4: The average duration of cases completed during a given time slot in which a given resource was involved.
    print(
        algorithm.average_case_duration(log, "2010-12-30 00:00:00",
                                        "2011-01-25 00:00:00", "Sue"))
    # Metric RBI 5.1: The number of cases completed during a given time slot in which two given resources were involved.
    print(
        algorithm.interaction_two_resources(log, "2010-12-30 00:00:00",
                                            "2011-01-25 00:00:00", "Mike",
                                            "Pete"))
    # Metric RBI 5.2: The fraction of resources involved in the same cases with a given resource during a given time slot
    # with respect to the total number of resources active during the time slot.
    print(
        algorithm.social_position(log, "2010-12-30 00:00:00",
                                  "2011-01-25 00:00:00", "Sue"))
예제 #7
0
def execute_script():
    dataframe = pd.read_csv(
        os.path.join("..", "tests", "input_data", "receipt.csv"))
    dataframe = pm4py.format_dataframe(dataframe)
    # prints the summary of the positions of two activities
    print(
        pm4py.get_activity_position_summary(dataframe,
                                            "Confirmation of receipt"))
    print(
        pm4py.get_activity_position_summary(
            dataframe, "T02 Check confirmation of receipt"))
예제 #8
0
def execute_script():
    df = pd.read_csv("../tests/input_data/interval_event_log.csv")
    df = pm4py.format_dataframe(df)
    log = pm4py.read_xes("../tests/input_data/interval_event_log.xes")
    heu_net = plusplus.apply_heu(log, parameters={"heu_net_decoration": "performance"})
    heu_net_2 = plusplus.apply_heu_pandas(df, parameters={"heu_net_decoration": "performance"})
    gviz = visualizer.apply(heu_net, parameters={"format": "svg"})
    visualizer.view(gviz)
    gviz2 = visualizer.apply(heu_net_2, parameters={"format": "svg"})
    visualizer.view(gviz2)
    net1, im1, fm1 = plusplus.apply(log)
    net2, im2, fm2 = plusplus.apply(log)
    gviz3 = pn_visualizer.apply(net1, im1, fm1, parameters={"format": "svg"})
    pn_visualizer.view(gviz3)
    gviz4 = pn_visualizer.apply(net2, im2, fm2, parameters={"format": "svg"})
    pn_visualizer.view(gviz4)
 def exportSubP(self, y_pred, center, name, encoding, alg):
     path = os.pardir + '/outputLog/' + encoding + '/' + alg
     try:
         os.makedirs(path, exist_ok=True)
     except OSError:
         print("Creation of the directory %s failed" % path)
     else:
         print("Successfully created the directory %s " % path)
         frames = [[] for i in range(max(y_pred) + 1)]
         for i, s in enumerate(self.sessions):
             frames[y_pred[i]].extend(s.export(self.attrNames, i))
         for i in range(max(y_pred) + 1):
             ind = list(
                 np.flip(np.argsort(centers[i][:len(self.distinct)])[-1:]))
             subP = concatName(self.distinct, ind)
             newFile = name + str(i) + subP + '.xes'
             log = pd.concat(frames[i], ignore_index=True)
             log = pm.format_dataframe(log,
                                       case_id='case',
                                       activity_key='concept:name',
                                       timestamp_key='time:timestamp')
             log = pm.convert_to_event_log(log)
             pm.write_xes(log, os.path.join(path, newFile))
         print("Sessions exported")
    def convertLog(self,
                   centers,
                   y_pred,
                   name,
                   encoding,
                   alg,
                   datapath,
                   exportSes=False):
        start = time.time()
        frames = []
        log = pd.DataFrame()
        for i, s in enumerate(self.sessions):
            abstracted = s.convertSession(centers[y_pred[i]], y_pred[i],
                                          self.distinct, self.attrNames)
            frames.append(abstracted)
        log = pd.concat(frames, ignore_index=True)
        log = pm.format_dataframe(log,
                                  case_id='case',
                                  activity_key='concept:name',
                                  timestamp_key='time:timestamp')
        num = math.ceil(len(log) * 0.7)

        log1 = log[:num]
        log2 = log[num:]

        log = pm.convert_to_event_log(log)
        log1 = pm.convert_to_event_log(log1)
        log2 = pm.convert_to_event_log(log2)

        pm.write_xes(log1, os.path.join(datapath, name + "train.xes"))
        pm.write_xes(log2, os.path.join(datapath, name + "test.xes"))
        pm.write_xes(log, os.path.join(datapath, name + ".xes"))

        if exportSes:
            self.exportSubP(y_pred, centers, name, encoding, alg)
        print("Convertion Time:", time.time() - start)
예제 #11
0
"""

import pandas as pd
import pm4py
'''
def import_csv(file_path):
    event_log = pandas.read_csv(file_path, sep=';')
    event_log = pm4py.format_dataframe(event_log, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
    start_activities = pm4py.get_start_activities(event_log)
    end_activities = pm4py.get_end_activities(event_log)
    print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities))
    
'''

file_path = r'file'
log = pm4py.format_dataframe(pd.read_csv(file_path, sep=';'),
                             case_id='case_id',
                             activity_key='activity',
                             timestamp_key='timestamp')
#log = log[log['@@index']< 40]
process_tree = pm4py.discover_tree_inductive(log)
bpmn_model = pm4py.convert_to_bpmn(process_tree)
pm4py.view_bpmn(bpmn_model)

from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.visualization.process_tree import visualizer as pt_visualizer

tree = inductive_miner.apply_tree(log)

gviz = pt_visualizer.apply(tree)
pt_visualizer.view(gviz)
예제 #12
0
def gerar_log_eventos(ramo_justica,
                      codtribunal,
                      atuacao,
                      cluster,
                      grau,
                      codorgaoj,
                      codnatureza,
                      codclasse,
                      dtinicio,
                      dtfim,
                      baixado=None,
                      sensibility='60'):

    eventLog = None

    cacheKey = "{0}-{1}-{2}-{3}-{4}-{5}-{6}-{7}-{8}-{9}-{10}".format(
        ramo_justica, codtribunal, atuacao, cluster, grau, codorgaoj,
        codnatureza, codclasse, dtinicio, dtfim, baixado)

    cachedEventLog = eventLogCache.get(cacheKey)
    if cachedEventLog is not None:
        eventLog = cachedEventLog

    else:
        conn = psycopg2.connect(host=db_host,
                                port=db_port,
                                database=db_name,
                                user=db_user,
                                password=db_pass)

        sufixo_ramo = ramos_justica.get(ramo_justica, 'default')

        tabela_fato = "inovacnj.fat_movimento_" + sufixo_ramo

        qry = "SELECT "
        qry += "  fat.npu as npu, "
        qry += "  CASE "
        qry += "  WHEN f.descricao IS NULL THEN fat.mov_cod ||  ' - ' || mov.descricao "
        qry += "  ELSE f.descricao || ': ' || fat.mov_cod ||  ' - ' || mov.descricao "
        qry += "  END AS atividade, "
        qry += "  fat.mov_dtmov as mov_dtmov "
        qry += "FROM " + tabela_fato + " fat "
        qry += "INNER JOIN inovacnj.acervo_processo_" + sufixo_ramo + " ap ON ap.npu = fat.npu "
        qry += "INNER JOIN inovacnj.orgao_julgador oj ON oj.cod::varchar = fat.oj_cod "
        qry += "INNER JOIN inovacnj.clusteroj_orgjulg cojoj ON cojoj.cod_orgao_julg = oj.cod "
        qry += "INNER JOIN inovacnj.movimentocnj mov ON mov.cod = fat.mov_cod "
        qry += "INNER JOIN inovacnj.natureza_classe nc ON nc.cod_classe = fat.codclasse "
        qry += "INNER JOIN inovacnj.natureza nat ON nat.cod = nc.cod_natureza "
        qry += "LEFT JOIN inovacnj.fase_movimento fm ON fm.cod_movimento = fat.mov_cod "
        qry += "LEFT JOIN inovacnj.fase f ON f.cod = fm.cod_fase "
        qry += "WHERE (1=1) "

        if baixado is not None:
            qry += "AND ap.baixado = '" + baixado + "' "
        if codtribunal is not None:
            qry += "AND fat.codtribunal = '" + codtribunal + "' "
        if atuacao is not None:
            qry += "AND oj.atuacao_vara = '" + atuacao + "' "
        if cluster is not None:
            qry += "AND cojoj.cod_cluster = " + cluster + " "
        if codorgaoj is not None:
            qry += "AND fat.oj_cod = '" + codorgaoj + "' "
        if grau is not None:
            qry += "AND fat.grau = '" + grau + "' "
        if codnatureza is not None:
            qry += "AND nat.cod = " + str(codnatureza) + " "
        if codclasse is not None:
            qry += "AND fat.codclasse = " + str(codclasse) + " "

        if dtinicio is not None and dtfim is not None:
            qry += "AND fat.mov_dtmov BETWEEN to_timestamp('" + dtinicio + "', 'yyyy-MM-dd') AND to_timestamp('" + dtfim + "', 'yyyy-MM-dd') "

        qry += "ORDER BY fat.npu, fat.mov_dtmov ASC "

        df_logeventos_pd = pd.read_sql_query(qry, conn)

        if df_logeventos_pd.empty == False:
            df_event_log = pm4py.format_dataframe(df_logeventos_pd,
                                                  case_id='npu',
                                                  activity_key='atividade',
                                                  timestamp_key='mov_dtmov')
            eventLog = pm4py.convert_to_event_log(df_event_log)

            eventLogCache[cacheKey] = eventLog
            #limpa da cache depois de 10 minutos
            timer3.apply_after(1000 * 60 * 15,
                               clear_eventlog_cache,
                               args=([cacheKey]),
                               priority=0)

    if eventLog is not None:
        if sensibility is not None:
            eventLog = pm4py.filter_variants_percentage(
                eventLog, percentage=float(sensibility) / 100)

    return eventLog
예제 #13
0
 def test_serialization_dataframe(self):
     df = pd.read_csv("input_data/running-example.csv")
     df = pm4py.format_dataframe(df)
     ser = pm4py.serialize(df)
     df2 = pm4py.deserialize(ser)
예제 #14
0
 def test_new_statistics_df(self):
     df = pd.read_csv("input_data/running-example.csv")
     df = pm4py.format_dataframe(df)
     pm4py.get_trace_attribute_values(df, "case:creator")
     pm4py.discover_eventually_follows_graph(df)
     pm4py.get_case_arrival_average(df)
예제 #15
0
def execute_script():
    ENABLE_VISUALIZATION = True

    # reads a XES into an event log
    log1 = pm4py.read_xes("../tests/input_data/running-example.xes")

    # reads a CSV into a dataframe
    df = pd.read_csv("../tests/input_data/running-example.csv")
    # formats the dataframe with the mandatory columns for process mining purposes
    df = pm4py.format_dataframe(df,
                                case_id="case:concept:name",
                                activity_key="concept:name",
                                timestamp_key="time:timestamp")
    # converts the dataframe to an event log
    log2 = pm4py.convert_to_event_log(df)

    # converts the log read from XES into a stream and dataframe respectively
    stream1 = pm4py.convert_to_event_stream(log1)
    df2 = pm4py.convert_to_dataframe(log1)

    # writes the log1 to a XES file
    pm4py.write_xes(log1, "ru1.xes")

    dfg, dfg_sa, dfg_ea = pm4py.discover_dfg(log1)
    petri_alpha, im_alpha, fm_alpha = pm4py.discover_petri_net_alpha(log1)
    petri_inductive, im_inductive, fm_inductive = pm4py.discover_petri_net_inductive(
        log1)
    petri_heuristics, im_heuristics, fm_heuristics = pm4py.discover_petri_net_heuristics(
        log1)
    tree_inductive = pm4py.discover_tree_inductive(log1)
    heu_net = pm4py.discover_heuristics_net(log1)

    pm4py.write_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.dfg")
    pm4py.write_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.pnml")
    pm4py.write_petri_net(petri_inductive, im_inductive, fm_inductive,
                          "ru_inductive.pnml")
    pm4py.write_petri_net(petri_heuristics, im_heuristics, fm_heuristics,
                          "ru_heuristics.pnml")
    pm4py.write_process_tree(tree_inductive, "ru_inductive.ptml")

    dfg, dfg_sa, dfg_ea = pm4py.read_dfg("ru_dfg.dfg")
    petri_alpha, im_alpha, fm_alpha = pm4py.read_petri_net("ru_alpha.pnml")
    petri_inductive, im_inductive, fm_inductive = pm4py.read_petri_net(
        "ru_inductive.pnml")
    petri_heuristics, im_heuristics, fm_heuristics = pm4py.read_petri_net(
        "ru_heuristics.pnml")
    tree_inductive = pm4py.read_process_tree("ru_inductive.ptml")

    pm4py.save_vis_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.png")
    pm4py.save_vis_petri_net(petri_inductive, im_inductive, fm_inductive,
                             "ru_inductive.png")
    pm4py.save_vis_petri_net(petri_heuristics, im_heuristics, fm_heuristics,
                             "ru_heuristics.png")
    pm4py.save_vis_process_tree(tree_inductive, "ru_inductive_tree.png")
    pm4py.save_vis_heuristics_net(heu_net, "ru_heunet.png")
    pm4py.save_vis_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.png")

    if ENABLE_VISUALIZATION:
        pm4py.view_petri_net(petri_alpha, im_alpha, fm_alpha, format="svg")
        pm4py.view_petri_net(petri_inductive,
                             im_inductive,
                             fm_inductive,
                             format="svg")
        pm4py.view_petri_net(petri_heuristics,
                             im_heuristics,
                             fm_heuristics,
                             format="svg")
        pm4py.view_process_tree(tree_inductive, format="svg")
        pm4py.view_heuristics_net(heu_net, format="svg")
        pm4py.view_dfg(dfg, dfg_sa, dfg_ea, format="svg")

    aligned_traces = pm4py.conformance_alignments(log1, petri_inductive,
                                                  im_inductive, fm_inductive)
    replayed_traces = pm4py.conformance_tbr(log1, petri_inductive,
                                            im_inductive, fm_inductive)

    fitness_tbr = pm4py.evaluate_fitness_tbr(log1, petri_inductive,
                                             im_inductive, fm_inductive)
    print("fitness_tbr", fitness_tbr)
    fitness_align = pm4py.evaluate_fitness_alignments(log1, petri_inductive,
                                                      im_inductive,
                                                      fm_inductive)
    print("fitness_align", fitness_align)
    precision_tbr = pm4py.evaluate_precision_tbr(log1, petri_inductive,
                                                 im_inductive, fm_inductive)
    print("precision_tbr", precision_tbr)
    precision_align = pm4py.evaluate_precision_alignments(
        log1, petri_inductive, im_inductive, fm_inductive)
    print("precision_align", precision_align)

    print("log start activities = ", pm4py.get_start_activities(log2))
    print("df start activities = ", pm4py.get_start_activities(df2))
    print("log end activities = ", pm4py.get_end_activities(log2))
    print("df end activities = ", pm4py.get_end_activities(df2))
    print("log attributes = ", pm4py.get_attributes(log2))
    print("df attributes = ", pm4py.get_attributes(df2))
    print("log org:resource values = ",
          pm4py.get_attribute_values(log2, "org:resource"))
    print("df org:resource values = ",
          pm4py.get_attribute_values(df2, "org:resource"))

    print("start_activities len(filt_log) = ",
          len(pm4py.filter_start_activities(log2, ["register request"])))
    print("start_activities len(filt_df) = ",
          len(pm4py.filter_start_activities(df2, ["register request"])))
    print("end_activities len(filt_log) = ",
          len(pm4py.filter_end_activities(log2, ["pay compensation"])))
    print("end_activities len(filt_df) = ",
          len(pm4py.filter_end_activities(df2, ["pay compensation"])))
    print(
        "attributes org:resource len(filt_log) (cases) cases = ",
        len(
            pm4py.filter_attribute_values(log2,
                                          "org:resource", ["Ellen"],
                                          level="case")))
    print(
        "attributes org:resource len(filt_log) (cases)  events = ",
        len(
            pm4py.filter_attribute_values(log2,
                                          "org:resource", ["Ellen"],
                                          level="event")))
    print(
        "attributes org:resource len(filt_df) (events) cases = ",
        len(
            pm4py.filter_attribute_values(df2,
                                          "org:resource", ["Ellen"],
                                          level="case")))
    print(
        "attributes org:resource len(filt_df) (events) events = ",
        len(
            pm4py.filter_attribute_values(df2,
                                          "org:resource", ["Ellen"],
                                          level="event")))
    print(
        "attributes org:resource len(filt_df) (events) events notpositive = ",
        len(
            pm4py.filter_attribute_values(df2,
                                          "org:resource", ["Ellen"],
                                          level="event",
                                          retain=False)))

    print("variants log = ", pm4py.get_variants(log2))
    print("variants df = ", pm4py.get_variants(df2))
    print(
        "variants filter log = ",
        len(
            pm4py.filter_variants(log2, [[
                "register request", "examine thoroughly", "check ticket",
                "decide", "reject request"
            ]])))
    print(
        "variants filter df = ",
        len(
            pm4py.filter_variants(df2, [[
                "register request", "examine thoroughly", "check ticket",
                "decide", "reject request"
            ]])))
    print("variants filter percentage = ",
          len(pm4py.filter_variants_percentage(log2, threshold=0.8)))

    print(
        "paths filter log len = ",
        len(
            pm4py.filter_directly_follows_relation(
                log2, [("register request", "examine casually")])))
    print(
        "paths filter dataframe len = ",
        len(
            pm4py.filter_directly_follows_relation(
                df2, [("register request", "examine casually")])))

    print(
        "timeframe filter log events len = ",
        len(
            pm4py.filter_time_range(log2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="events")))
    print(
        "timeframe filter log traces_contained len = ",
        len(
            pm4py.filter_time_range(log2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_contained")))
    print(
        "timeframe filter log traces_intersecting len = ",
        len(
            pm4py.filter_time_range(log2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_intersecting")))
    print(
        "timeframe filter df events len = ",
        len(
            pm4py.filter_time_range(df2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="events")))
    print(
        "timeframe filter df traces_contained len = ",
        len(
            pm4py.filter_time_range(df2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_contained")))
    print(
        "timeframe filter df traces_intersecting len = ",
        len(
            pm4py.filter_time_range(df2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_intersecting")))

    # remove the temporary files
    os.remove("ru1.xes")
    os.remove("ru_dfg.dfg")
    os.remove("ru_alpha.pnml")
    os.remove("ru_inductive.pnml")
    os.remove("ru_heuristics.pnml")
    os.remove("ru_inductive.ptml")
    os.remove("ru_alpha.png")
    os.remove("ru_inductive.png")
    os.remove("ru_heuristics.png")
    os.remove("ru_inductive_tree.png")
    os.remove("ru_heunet.png")
    os.remove("ru_dfg.png")
예제 #16
0
    elif contador == 1:
        linha = linha.rstrip('\n')
        variavel2 = linha
    elif contador == 2:
        linha = linha.rstrip('\n')
        variavel3 = linha
    contador += 1
entrada.close()

pd.set_option('max_columns', None)

logCsv = pd.read_csv('Production_Data.csv', sep=',')
logCsv = dataframe_utils.convert_timestamp_columns_in_df(logCsv)

logCsv = pm4py.format_dataframe(logCsv,
                                case_id=f'{variavel1}',
                                activity_key=f'{variavel2}',
                                timestamp_key=f'{variavel3}')
logCsv = log_converter.apply(logCsv)

from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner

net, im, fm = heuristics_miner.apply(
    logCsv,
    parameters={
        heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH:
        0.99
    })

from pm4py.visualization.petri_net import visualizer as pn_visualizer

gviz = pn_visualizer.apply(net, im, fm)
예제 #17
0
def log_import(file_path, file_format, log_information):
    """
    Imports the file using PM4PY functionalitites, formats
    it in a processable fashion, accoding to the Log information,
    if it is an CSV

    input: file_path str, file_format str, interval bool

    output: PM4PY default object dependent on Filetype, fromatted in case of csv
            The Set of all trace activities
    """

    activities = set()

    if file_format == "csv":

        # TODO Apply further file integrity check

        log = pd.read_csv(file_path)

        # Transform the Timestamp to Datetime
        if log_information["log_type"] == "noninterval":

            log[log_information["timestamp"]] = pd.to_datetime(
                log[log_information["timestamp"]], utc=True)

            log = format_dataframe(
                log,
                case_id=log_information["case_id"],
                activity_key=log_information["concept_name"],
                timestamp_key=log_information["timestamp"],
            )

        # Transform the Timestamp to Datetime, and rename the transition:lifecycle
        elif log_information["log_type"] == "lifecycle":

            # Convert the Timestamps to Datetime
            log[log_information["timestamp"]] = pd.to_datetime(
                log[log_information["timestamp"]], utc=True)

            log = format_dataframe(
                log,
                case_id=log_information["case_id"],
                activity_key=log_information["concept_name"],
                timestamp_key=log_information["timestamp"],
            )
            # Rename the Columns to the XES defaults
            log = log.rename(
                {log_information["lifecycle"]: xes.DEFAULT_TRANSITION_KEY},
                axis=1)

        elif log_information["log_type"] == "timestamp":

            # Convert the Timestamps to Datetime
            log[log_information["end_timestamp"]] = pd.to_datetime(
                log[log_information["end_timestamp"]], utc=True)
            log[log_information["start_timestamp"]] = pd.to_datetime(
                log[log_information["start_timestamp"]], utc=True)

            log = format_dataframe(
                log,
                case_id=log_information["case_id"],
                activity_key=log_information["concept_name"],
                timestamp_key=log_information["end_timestamp"],
            )

            # Rename the Columns to the XES defaults
            log = log.rename(
                {
                    log_information["start_timestamp"]:
                    xes.DEFAULT_START_TIMESTAMP_KEY
                },
                axis=1,
            )

        activities = set(log[xes.DEFAULT_NAME_KEY].unique())

    # Simply load the log using XES
    elif file_format == "xes":

        log = xes_importer.apply(file_path,
                                 parameters={"show_progress_bar": False})

        for trace in log:
            for event in trace:
                activities.add(event[log_information["concept_name"]])

    else:

        # TODO Throw some Warning / Show a warning Message in the Console
        print("Invalid Filepath")

    return log, activities
예제 #18
0
파일: main.py 프로젝트: joshelb/queuemining
try:
    if filename[-4:] == '.xes':
        variant = xes_importer.Variants.ITERPARSE
        parameters = {variant.value.Parameters.TIMESTAMP_SORT: True}
        log = xes_importer.apply(os.path.join(
            os.path.dirname(os.path.abspath(__file__)), filename),
                                 variant=variant,
                                 parameters=parameters)
        log.attributes['origin'] = 'xes'
    elif filename[-4:] == '.csv':
        log = pandas.read_csv(os.path.join(
            os.path.dirname(os.path.abspath(__file__)), filename),
                              sep=';')
        log = format_dataframe(log,
                               case_id='case_id',
                               activity_key='activity',
                               timestamp_key='timestamp',
                               timest_format='%Y-%m-%d %H:%M:%S%z')
        log = log_converter.apply(log)
except FileNotFoundError:
    print(f'No files with name "{filename}" found in folder.')
    exit()
except CustomException:
    print('Please only give the name of a file formatted in .xes or .csv')
    exit()
""" This section is for testing different abilities of pm4py and will be cleaned up later """
# filtered_log = filtered_log_events = timestamp_filter.apply_events(log, "2010-12-30 00:00:00", "2011-01-01 23:59:59")
# parameters = {inductive_miner.Variants.IM.value.Parameters.ACTIVITY_KEY: 'concept:name'}
# tree = inductive_miner.apply_tree(filtered_log, variant=inductive_miner.Variants.IM)
# net, im, fm = pt_converter.apply(tree, variant=pt_converter.Variants.TO_PETRI_NET)
# print(tree)
예제 #19
0
import pandas as pd
import pm4py
import os

os.environ['PATH'] += os.pathsep + '/usr/local/bin'
##1
# Preparing the log
event_log = pd.read_csv('running-exampleNew.csv', sep=';')
event_log = pm4py.format_dataframe(event_log,
                                   case_id='case_id',
                                   activity_key='activity',
                                   timestamp_key='timestamp')
start_activities = pm4py.get_start_activities(event_log)
end_activities = pm4py.get_end_activities(event_log)
print("Start activities: {}\nEnd activities: {}".format(
    start_activities, end_activities))

# Convert from CSV to XES
pm4py.write_xes(event_log, 'running-example-exported.xes')

# Algorithm alpha
log = pm4py.read_xes('running-example-exported.xes')
net, initial_marking, final_marking = pm4py.algo.discovery.alpha.algorithm.apply(
    log)
pm4py.view_petri_net(net, initial_marking, final_marking)

##2
# Preparing the log --- Resource
event_log = pd.read_csv('running-exampleNew.csv', sep=';')
event_log = pm4py.format_dataframe(event_log,
                                   case_id='case_id',