def get_default_representation_with_attribute_names(log, parameters=None, feature_names=None): """ Gets the default data representation of an event log (for process tree building) returning also the attribute names Parameters ------------- log Trace log parameters Possible parameters of the algorithm feature_names (If provided) Feature to use in the representation of the log Returns ------------- data Data to provide for decision tree learning feature_names Names of the features, in order """ if parameters is None: parameters = {} enable_activity_def_representation = parameters[ ENABLE_ACTIVITY_DEF_REPRESENTATION] if ENABLE_ACTIVITY_DEF_REPRESENTATION in parameters else False enable_succ_def_representation = parameters[ ENABLE_SUCC_DEF_REPRESENTATION] if ENABLE_SUCC_DEF_REPRESENTATION in parameters else False activity_key = parameters[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY blacklist = parameters["blacklist"] if "blacklist" in parameters else [] str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree( log) str_evsucc_attr = None if enable_succ_def_representation: str_evsucc_attr = [activity_key] if enable_activity_def_representation and activity_key not in str_ev_attr: str_ev_attr.append(activity_key) str_tr_attr = [x for x in str_tr_attr if x not in blacklist] str_ev_attr = [x for x in str_ev_attr if x not in blacklist] num_tr_attr = [x for x in num_tr_attr if x not in blacklist] num_ev_attr = [x for x in num_ev_attr if x not in blacklist] if str_evsucc_attr is not None: str_evsucc_attr = [x for x in str_evsucc_attr if x not in blacklist] data, feature_names = get_representation(log, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr, str_evsucc_attr=str_evsucc_attr, feature_names=feature_names) return data, feature_names, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr
def apply(log, net, initial_marking, final_marking, decision_point=None, attributes=None, parameters=None): """ Gets the essential information (features, target class and names of the target class) in order to learn a classifier Parameters -------------- log Event log net Petri net initial_marking Initial marking final_marking Final marking decision_point Point of the process in which a decision happens: - if not specified, the method crashes, but provides a list of possible decision points - if specified, the method goes on and produce the decision tree attributes Attributes of the log. If not specified, then an automatic attribute selection is performed. parameters Parameters of the algorithm Returns --------------- X features y Target class class_name Target class names """ if parameters is None: parameters = {} log = log_converter.apply(log, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) if decision_point is None: decision_points_names = get_decision_points(net, labels=True, parameters=parameters) raise Exception( "please provide decision_point as argument of the method. Possible decision points: ", decision_points_names) if attributes is None: str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree( log) attributes = list(str_ev_attr) + list(num_ev_attr) I, dp = get_decisions_table(log, net, initial_marking, final_marking, attributes=attributes, pre_decision_points=[decision_point], parameters=parameters) x_attributes = [a for a in attributes if not a == activity_key] x = [] y = [] for el in I[decision_point]: x.append({a: v for a, v in el[0].items() if a in x_attributes}) y.append(el[1]) X = pd.DataFrame(x) X = pd.get_dummies(data=X, columns=x_attributes) Y = pd.DataFrame(y, columns=["Name"]) Y, targets = encode_target(Y, "Name") y = Y['Target'] return X, y, targets
def test_select_attributes(self): from pm4py.statistics.attributes.log import select log = self.get_log() select.get_trace_attribute_values(log, "concept:name") select.select_attributes_from_log_for_tree(log)
def apply( log: Union[EventLog, pd.DataFrame], net: PetriNet, initial_marking: Marking, final_marking: Marking, decision_point=None, attributes=None, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Any: """ Gets the essential information (features, target class and names of the target class) in order to learn a classifier Parameters -------------- log Event log net Petri net initial_marking Initial marking final_marking Final marking decision_point Point of the process in which a decision happens: - if not specified, the method crashes, but provides a list of possible decision points - if specified, the method goes on and produce the decision tree attributes Attributes of the log. If not specified, then an automatic attribute selection is performed. parameters Parameters of the algorithm Returns --------------- X features y Target class class_name Target class names """ import pandas as pd if parameters is None: parameters = {} labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True) log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) if decision_point is None: decision_points_names = get_decision_points(net, labels=labels, parameters=parameters) raise Exception( "please provide decision_point as argument of the method. Possible decision points: ", decision_points_names) if attributes is None: str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree( log) attributes = list(str_ev_attr) + list(num_ev_attr) I, dp = get_decisions_table(log, net, initial_marking, final_marking, attributes=attributes, pre_decision_points=[decision_point], parameters=parameters) x_attributes = [a for a in attributes if not a == activity_key] str_attributes = set() non_str_attributes = set() x = [] x2 = [] y = [] for el in I[decision_point]: for a, v in el[0].items(): if a in x_attributes: if type(v) is str: str_attributes.add(a) else: non_str_attributes.add(a) x.append({ a: v for a, v in el[0].items() if a in x_attributes and type(v) is str }) x2.append({ a: v for a, v in el[0].items() if a in x_attributes and type(v) is not str }) y.append(el[1]) X = pd.DataFrame(x) X = pd.get_dummies(data=X, columns=list(str_attributes)) X2 = pd.DataFrame(x2) X = pd.concat([X, X2], axis=1) Y = pd.DataFrame(y, columns=["Name"]) Y, targets = encode_target(Y, "Name") y = Y['Target'] return X, y, targets