def execute_script(): log_path = os.path.join("..", "tests", "input_data", "roadtraffic50traces.xes") # log_path = os.path.join("..", "tests", "input_data", "receipt.xes") log = xes_importer.apply(log_path) # now, it is possible to get a default representation of an event log data, feature_names = log_to_features.apply( log, variant=log_to_features.Variants.TRACE_BASED) # gets classes representation by final concept:name value (end activity) target, classes = get_class_representation.get_class_representation_by_str_ev_attr_value_value( log, "concept:name") # mine the decision tree given 'data' and 'target' clf = tree.DecisionTreeClassifier(max_depth=7) clf.fit(data, target) # visualize the decision tree gviz = dt_vis.apply( clf, feature_names, classes, parameters={dt_vis.Variants.CLASSIC.value.Parameters.FORMAT: "svg"}) dt_vis.view(gviz) # gets classes representation by trace duration (threshold between the two classes = 200D) target, classes = get_class_representation.get_class_representation_by_trace_duration( log, 2 * 8640000) # mine the decision tree given 'data' and 'target' clf = tree.DecisionTreeClassifier(max_depth=7) clf.fit(data, target) # visualize the decision tree gviz = dt_vis.apply( clf, feature_names, classes, parameters={dt_vis.Variants.CLASSIC.value.Parameters.FORMAT: "svg"}) dt_vis.view(gviz)
def test_61(self): import os from pm4py.objects.log.importer.xes import importer as xes_importer log = xes_importer.apply(os.path.join("input_data", "roadtraffic50traces.xes")) from pm4py.objects.log.util import get_log_representation str_trace_attributes = [] str_event_attributes = ["concept:name"] num_trace_attributes = [] num_event_attributes = ["amount"] data, feature_names = get_log_representation.get_representation(log, str_trace_attributes, str_event_attributes, num_trace_attributes, num_event_attributes) data, feature_names = get_log_representation.get_default_representation(log) from pm4py.objects.log.util import get_class_representation target, classes = get_class_representation.get_class_representation_by_trace_duration(log, 2 * 8640000) from sklearn import tree clf = tree.DecisionTreeClassifier() clf.fit(data, target) from pm4py.visualization.decisiontree import visualizer as dectree_visualizer gviz = dectree_visualizer.apply(clf, feature_names, classes)
def execute_script(): # in this case, we obtain a decision tree by alignments on a specific decision point log = xes_importer.apply(os.path.join("..", "tests", "input_data", "running-example.xes")) net, im, fm = inductive_miner.apply(log) # we need to specify a decision point. In this case, the place p_10 is a suitable decision point clf, feature_names, classes = algorithm.get_decision_tree(log, net, im, fm, decision_point="p_10") # we can visualize the decision tree gviz = visualizer.apply(clf, feature_names, classes, parameters={visualizer.Variants.CLASSIC.value.Parameters.FORMAT: "svg"}) visualizer.view(gviz)
def test_decisiontree_traceduration(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log_path = os.path.join("input_data", "roadtraffic50traces.xes") log = xes_importer.apply(log_path) data, feature_names = get_log_representation.get_representation(log, [], ["concept:name"], [], ["amount"]) target, classes = get_class_representation.get_class_representation_by_trace_duration(log, 2 * 8640000) clf = tree.DecisionTreeClassifier(max_depth=7) clf.fit(data, target) gviz = dt_vis.apply(clf, feature_names, classes, parameters={dt_vis.Variants.CLASSIC.value.Parameters.FORMAT: "svg"}) del gviz
def test_decisiontree_evattrvalue(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log_path = os.path.join("input_data", "roadtraffic50traces.xes") log = xes_importer.apply(log_path) data, feature_names = log_to_features.apply(log, variant=log_to_features.Variants.TRACE_BASED, parameters={"str_tr_attr": [], "str_ev_attr": ["concept:name"], "num_tr_attr": [], "num_ev_attr": ["amount"]}) target, classes = get_class_representation.get_class_representation_by_str_ev_attr_value_value(log, "concept:name") clf = tree.DecisionTreeClassifier(max_depth=7) clf.fit(data, target) gviz = dt_vis.apply(clf, feature_names, classes, parameters={dt_vis.Variants.CLASSIC.value.Parameters.FORMAT: "svg"}) del gviz
num_event_attributes = ["amount"] data, feature_names = get_log_representation.get_representation(log, str_trace_attributes, str_event_attributes, num_trace_attributes, num_event_attributes) #error data, feature_names = get_log_representation.get_default_representation(log) import pandas as pd dataframe = pd.DataFrame(data, columns=feature_names) dataframe dataframe.to_csv("features.csv", index=False) from sklearn import tree clf = tree.DecisionTreeClassifier() clf.fit(data, target) from pm4py.visualization.decisiontree import visualizer as dectree_visualizer gviz = dectree_visualizer.apply(clf, feature_names, classes) #----- import os from pm4py.objects.log.importer.xes import importer as xes_importer log = xes_importer.apply(os.path.join("tests", "input_data", "roadtraffic50traces.xes")) from pm4py.objects.log.util import get_log_representation str_trace_attributes = [] str_event_attributes = ["concept:name"] num_trace_attributes = [] num_event_attributes = ["amount"] data, feature_names = get_log_representation.get_representation(log, str_trace_attributes, str_event_attributes, num_trace_attributes, num_event_attributes)