Example #1
0
def execute_script():
    stream1 = [{
        "event_id": "1",
        "event_activity": "A",
        "event_timestamp": parser.parse("1970-01-01 00:00:00"),
        "order": ["O1"]
    }, {
        "event_id": "2",
        "event_activity": "B",
        "event_timestamp": parser.parse("1970-01-01 00:00:00"),
        "order": ["O1"],
        "item": ["I1, I2"]
    }]
    df = pd.DataFrame(stream1)
    df.type = "succint"
    stream2 = [{
        "object_id": "O1",
        "object_type": "order",
        "object_buyer": "Alessandro"
    }, {
        "object_id": "I1",
        "object_type": "item",
        "object_cost": 600
    }]
    obj_df = pd.DataFrame(stream2)
    mdl_exporter.apply(df, "prova.mdl", obj_df=obj_df)
    df, obj_df = mdl_importer.apply("prova.mdl", return_obj_dataframe=True)
    orders = obj_df[obj_df["object_type"] == "order"].dropna(how="all", axis=1)
    items = obj_df[obj_df["object_type"] == "item"].dropna(how="all", axis=1)
    print(df)
    print(orders)
    print(items)
    mdl_exporter.apply(df, "prova2.mdl", obj_df=obj_df)
    os.remove("prova.mdl")
    os.remove("prova2.mdl")
Example #2
0
    def __init__(self, name, mdl_path, shared_logs):
        self.shared_logs = shared_logs
        self.shared_logs_names = []
        self.parent = self
        self.obj_types_str = None
        self.act_obj_types = None
        self.initial_act_obj_types = None
        self.activities = []
        self.obj_types = []
        self.clusters = {}
        self.clustersrepr = ""
        self.clusterid = str(id(self))
        self.stream = None
        self.nodes = None
        self.events_corr = None
        self.events_corr2 = None
        self.matrix = None
        self.powered_matrix = None
        self.powered_matrix_2 = None
        self.graph = None
        self.row_sum = None
        self.overall_sum = 0
        self.selected_act_obj_types = None
        self.name = name
        self.mdl_path = mdl_path
        if "ocel" in self.mdl_path:
            self.succint_dataframe, odataframe = ocel_importer.apply(
                self.mdl_path)
        else:
            self.succint_dataframe = mdl_importer.apply(self.mdl_path)
        self.succint_dataframe = self.succint_dataframe.dropna(
            subset=["event_activity"])
        self.succint_dataframe.type = "succint"
        self.exploded_dataframe = succint_mdl_to_exploded_mdl.apply(
            self.succint_dataframe)
        self.exploded_dataframe.type = "exploded"
        self.session_objects = {}

        self.possible_model_types = {
            "mvp_frequency": "MVP (frequency)",
            "mvp_performance": "MVP (performance)",
            "process_tree": "oc-PTree",
            "petri_alpha": "oc-Net-Alpha",
            "petri_inductive": "oc-Net-Inductive",
            "dfg": "oc-DFG",
            "multigraph": "OC Multigraph"
        }
        self.selected_model_type = defaults.DEFAULT_MODEL_TYPE
        self.possible_classifiers = {"activity", "combined"}
        self.selected_classifier = "activity"
        self.selected_aggregation_measure = "events"
        self.selected_decoration_measure = "frequency"
        self.selected_projection = "no"
        self.selected_min_acti_count = 800
        self.selected_min_edge_freq_count = 800
        self.epsilon = 0.0
        self.noise_threshold = 0.0
        self.model_view = ""
Example #3
0
def newExtractorDownloadSvg():
    parameters = request.args.get("parameters")
    parameters = __process_parameters(parameters)

    file_name = parameters["file_name"]

    from pm4pymdl.objects.mdl.importer import importer as mdl_importer
    df = mdl_importer.apply(file_name)

    from pm4pymdl.algo.mvp.gen_framework3 import discovery as mvp_discovery
    model = mvp_discovery.apply(df)
    from pm4pymdl.visualization.mvp.gen_framework3 import visualizer as mvp_visualizer
    gviz = mvp_visualizer.apply(model, parameters={"format": "svg"})

    ser = pm4py.visualization.dfg.visualizer.serialize(gviz).decode("utf-8")

    return ser
Example #4
0
from pm4pymdl.objects.mdl.importer import importer as mdl_importer
import os


df, obj_df = mdl_importer.apply("../example_logs/mdl/mdl-running-example-w-objects.mdl", return_obj_dataframe=True)
print(df)
print(obj_df)

df.to_csv("only_events.csv", index=False)
obj_df.to_csv("all_objects.csv", index=False)

products = obj_df[obj_df["object_type"] == "products"].dropna(how="all", axis=1)
customers = obj_df[obj_df["object_type"] == "customers"].dropna(how="all", axis=1)

print(products)
print(customers)

products.to_csv("products.csv", index=False)
customers.to_csv("customers.csv", index=False)

os.remove("only_events.csv")
os.remove("all_objects.csv")
os.remove("products.csv")
os.remove("customers.csv")
Example #5
0
from pm4pymdl.objects.mdl.importer import importer as mdl_importer
from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery
from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory

df = mdl_importer.apply("../example_logs/mdl/order_management.mdl")
model = discovery.apply(df,
                        model_type_variant="model3",
                        node_freq_variant="type31",
                        edge_freq_variant="type11")
gviz = vis_factory.apply(model, parameters={"format": "svg"})
vis_factory.view(gviz)
Example #6
0
from pm4pymdl.objects.mdl.importer import importer as mdl_importer
from pm4pymdl.algo.mvp.utils import succint_mdl_to_exploded_mdl
from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter
import random
import pandas as pd

succint_df = mdl_importer.apply("example_logs/mdl/mdl-running-example.mdl")
df = succint_mdl_to_exploded_mdl.apply(succint_df)
products = df["products"].dropna().unique()
customers = df["customers"].dropna().unique()

objects = []
for p in products:
    objects.append({
        "object_id": p,
        "object_type": "products",
        "object_cost": random.randrange(100, 500),
        "object_producer": random.choice(["A", "B", "C"])
    })
for c in customers:
    objects.append({
        "object_id": c,
        "object_type": "customers",
        "object_age": random.randrange(30, 60),
        "object_bankaccount": random.randrange(1000, 100000)
    })

print(objects)

obj_df = pd.DataFrame(objects)
mdl_exporter.apply(df, "mdl-running-example-w-objects.mdl", obj_df=obj_df)
Example #7
0
#!/usr/bin/env python3.8
import sys

filename = sys.argv[1]

from pm4pymdl.objects.mdl.importer import importer as mdl_importer
from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery
from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory

df = mdl_importer.apply(filename)

model = discovery.apply(df,
                        model_type_variant="model1",
                        node_freq_variant="type1",
                        edge_freq_variant="type11")
gviz = vis_factory.apply(model, parameters={"format": "svg"})
vis_factory.view(gviz)

model = discovery.apply(df,
                        model_type_variant="model2",
                        node_freq_variant="type21",
                        edge_freq_variant="type211")
gviz = vis_factory.apply(model, parameters={"format": "svg"})
vis_factory.view(gviz)

model = discovery.apply(df,
                        model_type_variant="model3",
                        node_freq_variant="type31",
                        edge_freq_variant="type11")
gviz = vis_factory.apply(model, parameters={"format": "svg"})
vis_factory.view(gviz)
from pm4pymdl.objects.mdl.importer import importer as mdl_importer
from pm4pymdl.algo.mvp.gen_framework3 import discovery, conformance
from pm4pymdl.visualization.mvp.gen_framework3 import visualizer as visualizer

dataframe = mdl_importer.apply("../example_logs/mdl/o2c_red.mdl")
model = discovery.apply(dataframe,
                        parameters={
                            "epsilon": 0.1,
                            "noise_threshold": 0.1
                        })
conf_result = conformance.apply(dataframe, model)
fitness = len([x for x in conf_result if len(x) == 0]) / len(conf_result)
print(fitness)
gviz = visualizer.apply(model,
                        parameters={
                            "min_act_freq": 500,
                            "min_edge_freq": 500
                        })
visualizer.view(gviz)
for el in conf_result:
    if el:
        print(el)
Example #9
0
from pm4pymdl.objects.mdl.importer import importer as mdl_importer
from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery
from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory
import pandas as pd

df_offer = mdl_importer.apply("../example_logs/parquet/bpic2017_offer.parquet")
df_offer["event_idx_log_0"] = df_offer.index
df_offer = df_offer.sort_values(["event_timestamp", "event_idx_log_0"])
df_application = mdl_importer.apply(
    "../example_logs/parquet/bpic2017_application.parquet")
df_application["event_idx_log_0"] = df_application.index
df_application = df_application.sort_values(
    ["event_timestamp", "event_idx_log_0"])
#df_application.fillna(value=pd.np.nan, inplace=True)
df = pd.concat([df_offer, df_application])
df["event_idx_log"] = df.index
df = df.reset_index()
df = df.sort_values(["event_timestamp", "event_idx_log"])
df.drop(columns="index", inplace=True)
model = discovery.apply(df)
gviz = vis_factory.apply(model,
                         parameters={
                             "dfg_cleaning_threshold": 0.15,
                             "max_edge_ratio": 0.99,
                             "format": "svg"
                         })
vis_factory.view(gviz)
from pm4pymdl.objects.mdl.importer import importer as mdl_importer

# import a log in Parquet format, that is stored as exploded MDL table
exploded_table = mdl_importer.apply("../example_logs/parquet/pkdd99.parquet")
print(exploded_table.columns)

# focus on the account ID: we wish to calculate the time from the previous event
exploded_table = exploded_table.dropna(subset=["account_id"], how="any")
exploded_table_prev_ev = exploded_table.copy()
exploded_table_prev_ev["@@index"] = exploded_table_prev_ev.index
exploded_table_prev_ev = exploded_table_prev_ev.reset_index()
exploded_table_prev_ev = exploded_table_prev_ev.sort_values(["account_id", "event_timestamp", "@@index"])
shifted_table = exploded_table_prev_ev.shift()
exploded_table_prev_ev["@@time_prev"] = (
            exploded_table_prev_ev["event_timestamp"] - shifted_table["event_timestamp"]).astype('timedelta64[s]')
exploded_table_prev_ev = exploded_table_prev_ev[exploded_table_prev_ev["account_id"] == shifted_table["account_id"]]

exploded_table_prev_ev = exploded_table_prev_ev.dropna(subset=["@@time_prev"])
exploded_table_prev_ev = exploded_table_prev_ev.sort_values(["@@time_prev", "event_timestamp", "@@index"])

# keep only the lowest difference between times
exploded_table_prev_ev = exploded_table_prev_ev.groupby("event_id").first().reset_index()

print(exploded_table_prev_ev)
# focus on the account_id
Example #11
0
from pm4pymdl.objects.mdl.importer import importer as mdl_importer
from pm4pymdl.algo.mvp.utils import succint_mdl_to_exploded_mdl
from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery
from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory
from pm4pymdl.objects.mdl.exporter import exporter as mdl_exporter

# import a succint MDL table
succint_table = mdl_importer.apply("../example_logs/mdl/order_management.mdl")
print(len(succint_table), succint_table.type)
# convert it into an exploded MDL table
exploded_table = succint_mdl_to_exploded_mdl.apply(succint_table)
print(len(exploded_table), exploded_table.type)
# keeps only events related to orders that have a profit >= 200
# to make the filtering on the exploded table we have to follow the procedure:
f0 = exploded_table[exploded_table["event_profit"] >= 200]
f1 = exploded_table[exploded_table["order"].isin(f0["order"])]
filtered_exploded_table = exploded_table[exploded_table["event_id"].isin(
    f1["event_id"])]

# suppose that we want to get also the packages related to the filtered orders, then:
f2 = exploded_table[exploded_table["package"].isin(
    filtered_exploded_table["package"])]
filtered_table_2 = exploded_table[
    exploded_table["event_id"].isin(filtered_exploded_table["event_id"])
    | exploded_table["event_id"].isin(f2["event_id"])]

# mine a process model out of the filtered table
model = discovery.apply(filtered_table_2)
gviz = vis_factory.apply(model)
vis_factory.view(gviz)
Example #12
0
from pm4pymdl.objects.mdl.importer import importer as mdl_importer
from pm4pymdl.algo.mvp.utils import succint_mdl_to_exploded_mdl, succint_stream_to_exploded_stream
from copy import deepcopy
import numpy as np
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine
import networkx as nx
from networkx.algorithms.community import asyn_lpa_communities
from networkx.algorithms.community import quality

log0 = mdl_importer.apply("example_logs/mdl/log_opp_red.mdl")
log = succint_mdl_to_exploded_mdl.apply(log0)
stream = log.to_dict('r')
nodes = dict()
for ev in stream:
    ev2 = {x: y for x, y in ev.items() if str(y) != "nan"}
    id = "event_id=" + str(ev2["event_id"])
    activity = "event_activity=" + ev2["event_activity"]
    if id not in nodes:
        nodes[id] = len(nodes)
    if activity not in nodes:
        nodes[activity] = len(nodes)
    for col in ev2:
        if not col.startswith("event_"):
            val = ev2[col]
            oid = "object_id=" + str(val)
            cla = "class=" + str(col)
            if oid not in nodes:
                nodes[oid] = len(nodes)
            if cla not in nodes:
                nodes[cla] = len(nodes)