def execute_script(): stream1 = [{ "event_id": "1", "event_activity": "A", "event_timestamp": parser.parse("1970-01-01 00:00:00"), "order": ["O1"] }, { "event_id": "2", "event_activity": "B", "event_timestamp": parser.parse("1970-01-01 00:00:00"), "order": ["O1"], "item": ["I1, I2"] }] df = pd.DataFrame(stream1) df.type = "succint" stream2 = [{ "object_id": "O1", "object_type": "order", "object_buyer": "Alessandro" }, { "object_id": "I1", "object_type": "item", "object_cost": 600 }] obj_df = pd.DataFrame(stream2) mdl_exporter.apply(df, "prova.mdl", obj_df=obj_df) df, obj_df = mdl_importer.apply("prova.mdl", return_obj_dataframe=True) orders = obj_df[obj_df["object_type"] == "order"].dropna(how="all", axis=1) items = obj_df[obj_df["object_type"] == "item"].dropna(how="all", axis=1) print(df) print(orders) print(items) mdl_exporter.apply(df, "prova2.mdl", obj_df=obj_df) os.remove("prova.mdl") os.remove("prova2.mdl")
def __init__(self, name, mdl_path, shared_logs): self.shared_logs = shared_logs self.shared_logs_names = [] self.parent = self self.obj_types_str = None self.act_obj_types = None self.initial_act_obj_types = None self.activities = [] self.obj_types = [] self.clusters = {} self.clustersrepr = "" self.clusterid = str(id(self)) self.stream = None self.nodes = None self.events_corr = None self.events_corr2 = None self.matrix = None self.powered_matrix = None self.powered_matrix_2 = None self.graph = None self.row_sum = None self.overall_sum = 0 self.selected_act_obj_types = None self.name = name self.mdl_path = mdl_path if "ocel" in self.mdl_path: self.succint_dataframe, odataframe = ocel_importer.apply( self.mdl_path) else: self.succint_dataframe = mdl_importer.apply(self.mdl_path) self.succint_dataframe = self.succint_dataframe.dropna( subset=["event_activity"]) self.succint_dataframe.type = "succint" self.exploded_dataframe = succint_mdl_to_exploded_mdl.apply( self.succint_dataframe) self.exploded_dataframe.type = "exploded" self.session_objects = {} self.possible_model_types = { "mvp_frequency": "MVP (frequency)", "mvp_performance": "MVP (performance)", "process_tree": "oc-PTree", "petri_alpha": "oc-Net-Alpha", "petri_inductive": "oc-Net-Inductive", "dfg": "oc-DFG", "multigraph": "OC Multigraph" } self.selected_model_type = defaults.DEFAULT_MODEL_TYPE self.possible_classifiers = {"activity", "combined"} self.selected_classifier = "activity" self.selected_aggregation_measure = "events" self.selected_decoration_measure = "frequency" self.selected_projection = "no" self.selected_min_acti_count = 800 self.selected_min_edge_freq_count = 800 self.epsilon = 0.0 self.noise_threshold = 0.0 self.model_view = ""
def newExtractorDownloadSvg(): parameters = request.args.get("parameters") parameters = __process_parameters(parameters) file_name = parameters["file_name"] from pm4pymdl.objects.mdl.importer import importer as mdl_importer df = mdl_importer.apply(file_name) from pm4pymdl.algo.mvp.gen_framework3 import discovery as mvp_discovery model = mvp_discovery.apply(df) from pm4pymdl.visualization.mvp.gen_framework3 import visualizer as mvp_visualizer gviz = mvp_visualizer.apply(model, parameters={"format": "svg"}) ser = pm4py.visualization.dfg.visualizer.serialize(gviz).decode("utf-8") return ser
from pm4pymdl.objects.mdl.importer import importer as mdl_importer import os df, obj_df = mdl_importer.apply("../example_logs/mdl/mdl-running-example-w-objects.mdl", return_obj_dataframe=True) print(df) print(obj_df) df.to_csv("only_events.csv", index=False) obj_df.to_csv("all_objects.csv", index=False) products = obj_df[obj_df["object_type"] == "products"].dropna(how="all", axis=1) customers = obj_df[obj_df["object_type"] == "customers"].dropna(how="all", axis=1) print(products) print(customers) products.to_csv("products.csv", index=False) customers.to_csv("customers.csv", index=False) os.remove("only_events.csv") os.remove("all_objects.csv") os.remove("products.csv") os.remove("customers.csv")
from pm4pymdl.objects.mdl.importer import importer as mdl_importer from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory df = mdl_importer.apply("../example_logs/mdl/order_management.mdl") model = discovery.apply(df, model_type_variant="model3", node_freq_variant="type31", edge_freq_variant="type11") gviz = vis_factory.apply(model, parameters={"format": "svg"}) vis_factory.view(gviz)
from pm4pymdl.objects.mdl.importer import importer as mdl_importer from pm4pymdl.algo.mvp.utils import succint_mdl_to_exploded_mdl from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter import random import pandas as pd succint_df = mdl_importer.apply("example_logs/mdl/mdl-running-example.mdl") df = succint_mdl_to_exploded_mdl.apply(succint_df) products = df["products"].dropna().unique() customers = df["customers"].dropna().unique() objects = [] for p in products: objects.append({ "object_id": p, "object_type": "products", "object_cost": random.randrange(100, 500), "object_producer": random.choice(["A", "B", "C"]) }) for c in customers: objects.append({ "object_id": c, "object_type": "customers", "object_age": random.randrange(30, 60), "object_bankaccount": random.randrange(1000, 100000) }) print(objects) obj_df = pd.DataFrame(objects) mdl_exporter.apply(df, "mdl-running-example-w-objects.mdl", obj_df=obj_df)
#!/usr/bin/env python3.8 import sys filename = sys.argv[1] from pm4pymdl.objects.mdl.importer import importer as mdl_importer from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory df = mdl_importer.apply(filename) model = discovery.apply(df, model_type_variant="model1", node_freq_variant="type1", edge_freq_variant="type11") gviz = vis_factory.apply(model, parameters={"format": "svg"}) vis_factory.view(gviz) model = discovery.apply(df, model_type_variant="model2", node_freq_variant="type21", edge_freq_variant="type211") gviz = vis_factory.apply(model, parameters={"format": "svg"}) vis_factory.view(gviz) model = discovery.apply(df, model_type_variant="model3", node_freq_variant="type31", edge_freq_variant="type11") gviz = vis_factory.apply(model, parameters={"format": "svg"}) vis_factory.view(gviz)
from pm4pymdl.objects.mdl.importer import importer as mdl_importer from pm4pymdl.algo.mvp.gen_framework3 import discovery, conformance from pm4pymdl.visualization.mvp.gen_framework3 import visualizer as visualizer dataframe = mdl_importer.apply("../example_logs/mdl/o2c_red.mdl") model = discovery.apply(dataframe, parameters={ "epsilon": 0.1, "noise_threshold": 0.1 }) conf_result = conformance.apply(dataframe, model) fitness = len([x for x in conf_result if len(x) == 0]) / len(conf_result) print(fitness) gviz = visualizer.apply(model, parameters={ "min_act_freq": 500, "min_edge_freq": 500 }) visualizer.view(gviz) for el in conf_result: if el: print(el)
from pm4pymdl.objects.mdl.importer import importer as mdl_importer from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory import pandas as pd df_offer = mdl_importer.apply("../example_logs/parquet/bpic2017_offer.parquet") df_offer["event_idx_log_0"] = df_offer.index df_offer = df_offer.sort_values(["event_timestamp", "event_idx_log_0"]) df_application = mdl_importer.apply( "../example_logs/parquet/bpic2017_application.parquet") df_application["event_idx_log_0"] = df_application.index df_application = df_application.sort_values( ["event_timestamp", "event_idx_log_0"]) #df_application.fillna(value=pd.np.nan, inplace=True) df = pd.concat([df_offer, df_application]) df["event_idx_log"] = df.index df = df.reset_index() df = df.sort_values(["event_timestamp", "event_idx_log"]) df.drop(columns="index", inplace=True) model = discovery.apply(df) gviz = vis_factory.apply(model, parameters={ "dfg_cleaning_threshold": 0.15, "max_edge_ratio": 0.99, "format": "svg" }) vis_factory.view(gviz)
from pm4pymdl.objects.mdl.importer import importer as mdl_importer # import a log in Parquet format, that is stored as exploded MDL table exploded_table = mdl_importer.apply("../example_logs/parquet/pkdd99.parquet") print(exploded_table.columns) # focus on the account ID: we wish to calculate the time from the previous event exploded_table = exploded_table.dropna(subset=["account_id"], how="any") exploded_table_prev_ev = exploded_table.copy() exploded_table_prev_ev["@@index"] = exploded_table_prev_ev.index exploded_table_prev_ev = exploded_table_prev_ev.reset_index() exploded_table_prev_ev = exploded_table_prev_ev.sort_values(["account_id", "event_timestamp", "@@index"]) shifted_table = exploded_table_prev_ev.shift() exploded_table_prev_ev["@@time_prev"] = ( exploded_table_prev_ev["event_timestamp"] - shifted_table["event_timestamp"]).astype('timedelta64[s]') exploded_table_prev_ev = exploded_table_prev_ev[exploded_table_prev_ev["account_id"] == shifted_table["account_id"]] exploded_table_prev_ev = exploded_table_prev_ev.dropna(subset=["@@time_prev"]) exploded_table_prev_ev = exploded_table_prev_ev.sort_values(["@@time_prev", "event_timestamp", "@@index"]) # keep only the lowest difference between times exploded_table_prev_ev = exploded_table_prev_ev.groupby("event_id").first().reset_index() print(exploded_table_prev_ev) # focus on the account_id
from pm4pymdl.objects.mdl.importer import importer as mdl_importer from pm4pymdl.algo.mvp.utils import succint_mdl_to_exploded_mdl from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter # import a succint MDL table succint_table = mdl_importer.apply("../example_logs/mdl/order_management.mdl") print(len(succint_table), succint_table.type) # convert it into an exploded MDL table exploded_table = succint_mdl_to_exploded_mdl.apply(succint_table) print(len(exploded_table), exploded_table.type) # keeps only events related to orders that have a profit >= 200 # to make the filtering on the exploded table we have to follow the procedure: f0 = exploded_table[exploded_table["event_profit"] >= 200] f1 = exploded_table[exploded_table["order"].isin(f0["order"])] filtered_exploded_table = exploded_table[exploded_table["event_id"].isin( f1["event_id"])] # suppose that we want to get also the packages related to the filtered orders, then: f2 = exploded_table[exploded_table["package"].isin( filtered_exploded_table["package"])] filtered_table_2 = exploded_table[ exploded_table["event_id"].isin(filtered_exploded_table["event_id"]) | exploded_table["event_id"].isin(f2["event_id"])] # mine a process model out of the filtered table model = discovery.apply(filtered_table_2) gviz = vis_factory.apply(model) vis_factory.view(gviz)
from pm4pymdl.objects.mdl.importer import importer as mdl_importer from pm4pymdl.algo.mvp.utils import succint_mdl_to_exploded_mdl, succint_stream_to_exploded_stream from copy import deepcopy import numpy as np from sklearn.metrics import pairwise_distances from scipy.spatial.distance import cosine import networkx as nx from networkx.algorithms.community import asyn_lpa_communities from networkx.algorithms.community import quality log0 = mdl_importer.apply("example_logs/mdl/log_opp_red.mdl") log = succint_mdl_to_exploded_mdl.apply(log0) stream = log.to_dict('r') nodes = dict() for ev in stream: ev2 = {x: y for x, y in ev.items() if str(y) != "nan"} id = "event_id=" + str(ev2["event_id"]) activity = "event_activity=" + ev2["event_activity"] if id not in nodes: nodes[id] = len(nodes) if activity not in nodes: nodes[activity] = len(nodes) for col in ev2: if not col.startswith("event_"): val = ev2[col] oid = "object_id=" + str(val) cla = "class=" + str(col) if oid not in nodes: nodes[oid] = len(nodes) if cla not in nodes: nodes[cla] = len(nodes)